ruslanmv commited on
Commit
3f6ce08
1 Parent(s): e3e84e7
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Dockerfile CHANGED
@@ -1,7 +1,29 @@
1
  FROM python:3.11-slim
2
  WORKDIR $HOME/app
3
  COPY . .
 
 
 
 
 
 
 
 
4
  RUN pip install -r requirements.txt
5
  VOLUME /data
6
  EXPOSE 23333
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  CMD ["python", "-m", "apis.chat_api"]
 
1
  FROM python:3.11-slim
2
  WORKDIR $HOME/app
3
  COPY . .
4
+ RUN pip3 uninstall ffmpeg
5
+ RUN pip3 uninstall ffmpeg-python
6
+ RUN pip uninstall ffmpeg
7
+ RUN pip uninstall ffmpeg-python
8
+ RUN pip3 install ffmpeg
9
+ RUN pip3 install ffmpeg-python
10
+ RUN pip install ffmpeg
11
+ RUN pip install ffmpeg-python
12
  RUN pip install -r requirements.txt
13
  VOLUME /data
14
  EXPOSE 23333
15
+ RUN useradd -m -u 1000 user
16
+ USER user
17
+ ENV HOME=/home/user \
18
+ PATH=/home/user/.local/bin:$PATH
19
+
20
+ WORKDIR $HOME/app
21
+
22
+ COPY --chown=user . $HOME/app
23
+ RUN mkdir -p $HOME/app/models
24
+ RUN chmod 777 $HOME/app/models
25
+ ENV MODELS_PATH=$HOME/app/models
26
+ RUN mkdir -p $HOME/app/uploads
27
+ RUN chmod 777 $HOME/app/uploads
28
+
29
  CMD ["python", "-m", "apis.chat_api"]
README.md CHANGED
@@ -10,19 +10,20 @@ app_port: 23333
10
  ## HF-LLM-API-COLLECTION
11
  Huggingface LLM Inference API in OpenAI message format.
12
 
13
- Project link: https://github.com/ruslanmv/hf-llm-api-collection
14
-
15
  ## Features
16
 
17
- - Available Models (2024/01/22): [#5](https://github.com/ruslanmv/hf-llm-api-collection/issues/5)
18
- - `mistral-7b`, `mixtral-8x7b`, `nous-mixtral-8x7b`
19
- - Adaptive prompt templates for different models
 
20
  - Support OpenAI API format
21
- - Enable api endpoint via official `openai-python` package
22
- - Support both stream and no-stream response
23
- - Support API Key via both HTTP auth header and env varible [#4](https://github.com/ruslanmv/hf-llm-api-collection/issues/4)
24
  - Docker deployment
25
 
 
 
 
26
  ## Run API service
27
 
28
  ### Run in Command Line
@@ -62,17 +63,14 @@ sudo docker run -p 23333:23333 --env http_proxy="http://<server>:<port>" hf-llm-
62
 
63
  ### Using `openai-python`
64
 
65
- See: [`examples/chat_with_openai.py`](https://github.com/ruslanmv/hf-llm-api-collection/blob/main/examples/chat_with_openai.py)
66
 
67
  ```py
68
  from openai import OpenAI
69
 
70
  # If runnning this service with proxy, you might need to unset `http(s)_proxy`.
71
  base_url = "http://127.0.0.1:23333"
72
- # Your own HF_TOKEN
73
- api_key = "hf_xxxxxxxxxxxxxxxx"
74
- # use below as non-auth user
75
- # api_key = "sk-xxx"
76
 
77
  client = OpenAI(base_url=base_url, api_key=api_key)
78
  response = client.chat.completions.create(
@@ -97,7 +95,7 @@ for chunk in response:
97
 
98
  ### Using post requests
99
 
100
- See: [`examples/chat_with_post.py`](https://github.com/ruslanmv/hf-llm-api-collection/blob/main/examples/chat_with_post.py)
101
 
102
 
103
  ```py
@@ -108,11 +106,7 @@ import re
108
 
109
  # If runnning this service with proxy, you might need to unset `http(s)_proxy`.
110
  chat_api = "http://127.0.0.1:23333"
111
- # Your own HF_TOKEN
112
- api_key = "hf_xxxxxxxxxxxxxxxx"
113
- # use below as non-auth user
114
- # api_key = "sk-xxx"
115
-
116
  requests_headers = {}
117
  requests_payload = {
118
  "model": "mixtral-8x7b",
 
10
  ## HF-LLM-API-COLLECTION
11
  Huggingface LLM Inference API in OpenAI message format.
12
 
 
 
13
  ## Features
14
 
15
+ Implemented:
16
+
17
+ - Support Models
18
+ - `mixtral-8x7b`, `mistral-7b`
19
  - Support OpenAI API format
20
+ - Can use api endpoint via official `openai-python` package
21
+ - Stream response
 
22
  - Docker deployment
23
 
24
+ 🔨 In progress:
25
+ - [x] Support more models
26
+
27
  ## Run API service
28
 
29
  ### Run in Command Line
 
63
 
64
  ### Using `openai-python`
65
 
66
+ See: [examples/chat_with_openai.py](https://github.com/ruslanmv/hf-llm-api-collection/blob/main/examples/chat_with_openai.py)
67
 
68
  ```py
69
  from openai import OpenAI
70
 
71
  # If runnning this service with proxy, you might need to unset `http(s)_proxy`.
72
  base_url = "http://127.0.0.1:23333"
73
+ api_key = "sk-xxxxx"
 
 
 
74
 
75
  client = OpenAI(base_url=base_url, api_key=api_key)
76
  response = client.chat.completions.create(
 
95
 
96
  ### Using post requests
97
 
98
+ See: [examples/chat_with_post.py](https://github.com/ruslanmv/hf-llm-api-collection/blob/main/examples/chat_with_post.py)
99
 
100
 
101
  ```py
 
106
 
107
  # If runnning this service with proxy, you might need to unset `http(s)_proxy`.
108
  chat_api = "http://127.0.0.1:23333"
109
+ api_key = "sk-xxxxx"
 
 
 
 
110
  requests_headers = {}
111
  requests_payload = {
112
  "model": "mixtral-8x7b",
apis/chat_api.py CHANGED
@@ -1,21 +1,40 @@
1
  import argparse
2
- import markdown2
3
- import os
4
- import sys
5
  import uvicorn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- from pathlib import Path
8
- from fastapi import FastAPI, Depends
9
- from fastapi.responses import HTMLResponse
10
- from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
11
  from pydantic import BaseModel, Field
12
- from typing import Union
13
- from sse_starlette.sse import EventSourceResponse, ServerSentEvent
14
  from utils.logger import logger
15
  from networks.message_streamer import MessageStreamer
16
  from messagers.message_composer import MessageComposer
17
- from mocks.stream_chat_mocker import stream_chat_mock
18
-
 
 
 
 
19
 
20
  class ChatAPIApp:
21
  def __init__(self):
@@ -27,148 +46,229 @@ class ChatAPIApp:
27
  )
28
  self.setup_routes()
29
 
30
- def get_available_models(self):
31
- # https://platform.openai.com/docs/api-reference/models/list
32
- # ANCHOR[id=available-models]: Available models
33
- self.available_models = {
34
- "object": "list",
35
- "data": [
36
- {
37
- "id": "mixtral-8x7b",
38
- "description": "[mistralai/Mixtral-8x7B-Instruct-v0.1]: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1",
39
- "object": "model",
40
- "created": 1700000000,
41
- "owned_by": "mistralai",
42
- },
43
- {
44
- "id": "mistral-7b",
45
- "description": "[mistralai/Mistral-7B-Instruct-v0.2]: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2",
46
- "object": "model",
47
- "created": 1700000000,
48
- "owned_by": "mistralai",
49
- },
50
- {
51
- "id": "nous-mixtral-8x7b",
52
- "description": "[NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO]: https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
53
- "object": "model",
54
- "created": 1700000000,
55
- "owned_by": "NousResearch",
56
- },
57
- ],
58
- }
59
  return self.available_models
60
 
61
- def extract_api_key(
62
- credentials: HTTPAuthorizationCredentials = Depends(
63
- HTTPBearer(auto_error=False)
64
- ),
65
- ):
66
- api_key = None
67
- if credentials:
68
- api_key = credentials.credentials
69
- else:
70
- api_key = os.getenv("HF_TOKEN")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
- if api_key:
73
- if api_key.startswith("hf_"):
74
- return api_key
75
- else:
76
- logger.warn(f"Invalid HF Token!")
77
  else:
78
- logger.warn("Not provide HF Token!")
79
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
- class ChatCompletionsPostItem(BaseModel):
82
  model: str = Field(
83
- default="mixtral-8x7b",
84
- description="(str) `mixtral-8x7b`",
85
  )
86
- messages: list = Field(
87
- default=[{"role": "user", "content": "Hello, who are you?"}],
88
- description="(list) Messages",
89
  )
90
- temperature: Union[float, None] = Field(
91
- default=0.5,
92
- description="(float) Temperature",
93
  )
94
- top_p: Union[float, None] = Field(
95
- default=0.95,
96
- description="(float) top p",
97
- )
98
- max_tokens: Union[int, None] = Field(
99
- default=-1,
100
- description="(int) Max tokens",
101
- )
102
- use_cache: bool = Field(
103
- default=False,
104
- description="(bool) Use cache",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  )
106
- stream: bool = Field(
107
- default=True,
108
- description="(bool) Stream",
 
 
 
 
 
 
 
 
 
 
109
  )
110
 
111
- def chat_completions(
112
- self, item: ChatCompletionsPostItem, api_key: str = Depends(extract_api_key)
113
- ):
114
- streamer = MessageStreamer(model=item.model)
115
- composer = MessageComposer(model=item.model)
116
- composer.merge(messages=item.messages)
117
- # streamer.chat = stream_chat_mock
118
-
119
- stream_response = streamer.chat_response(
120
- prompt=composer.merged_str,
121
- temperature=item.temperature,
122
- top_p=item.top_p,
123
- max_new_tokens=item.max_tokens,
124
- api_key=api_key,
125
- use_cache=item.use_cache,
126
  )
127
- if item.stream:
128
- event_source_response = EventSourceResponse(
129
- streamer.chat_return_generator(stream_response),
130
- media_type="text/event-stream",
131
- ping=2000,
132
- ping_message_factory=lambda: ServerSentEvent(**{"comment": ""}),
133
- )
134
- return event_source_response
135
- else:
136
- data_response = streamer.chat_return_dict(stream_response)
137
- return data_response
138
-
139
- def get_readme(self):
140
- readme_path = Path(__file__).parents[1] / "README.md"
141
- with open(readme_path, "r", encoding="utf-8") as rf:
142
- readme_str = rf.read()
143
- readme_html = markdown2.markdown(
144
- readme_str, extras=["table", "fenced-code-blocks", "highlightjs-lang"]
145
  )
146
- return readme_html
147
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  def setup_routes(self):
149
- for prefix in ["", "/v1", "/api", "/api/v1"]:
150
- if prefix in ["/api/v1"]:
151
- include_in_schema = True
152
- else:
153
- include_in_schema = False
154
-
155
  self.app.get(
156
- prefix + "/models",
157
- summary="Get available models",
158
- include_in_schema=include_in_schema,
159
- )(self.get_available_models)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
  self.app.post(
162
- prefix + "/chat/completions",
163
- summary="Chat completions in conversation session",
164
- include_in_schema=include_in_schema,
165
- )(self.chat_completions)
166
- self.app.get(
167
- "/readme",
168
- summary="README of HF LLM API",
169
- response_class=HTMLResponse,
170
- include_in_schema=False,
171
- )(self.get_readme)
172
 
173
 
174
  class ArgParser(argparse.ArgumentParser):
@@ -203,6 +303,77 @@ class ArgParser(argparse.ArgumentParser):
203
 
204
  app = ChatAPIApp().app
205
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  if __name__ == "__main__":
207
  args = ArgParser().args
208
  if args.dev:
@@ -211,4 +382,4 @@ if __name__ == "__main__":
211
  uvicorn.run("__main__:app", host=args.server, port=args.port, reload=False)
212
 
213
  # python -m apis.chat_api # [Docker] on product mode
214
- # python -m apis.chat_api -d # [Dev] on develop mode
 
1
  import argparse
 
 
 
2
  import uvicorn
3
+ import sys
4
+ import os
5
+ import io
6
+ from transformers import M2M100Tokenizer, M2M100ForConditionalGeneration
7
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
8
+ import time
9
+ import json
10
+ from typing import List
11
+ import torch
12
+ import logging
13
+ import string
14
+ import random
15
+ import base64
16
+ import re
17
+ import requests
18
+ from utils.enver import enver
19
+ import shutil
20
+ import tempfile
21
+ import numpy as np
22
 
23
+
24
+ from fastapi import FastAPI, Response, File, UploadFile, Form
25
+ from fastapi.encoders import jsonable_encoder
26
+ from fastapi.responses import JSONResponse, StreamingResponse
27
  from pydantic import BaseModel, Field
28
+ from sse_starlette.sse import EventSourceResponse
 
29
  from utils.logger import logger
30
  from networks.message_streamer import MessageStreamer
31
  from messagers.message_composer import MessageComposer
32
+ from googletrans import Translator
33
+ from io import BytesIO
34
+ from gtts import gTTS
35
+ from fastapi.middleware.cors import CORSMiddleware
36
+ from pathlib import Path
37
+ from tempfile import NamedTemporaryFile
38
 
39
  class ChatAPIApp:
40
  def __init__(self):
 
46
  )
47
  self.setup_routes()
48
 
49
+ def get_available_langs(self):
50
+ f = open('apis/lang_name.json', "r")
51
+ self.available_models = json.loads(f.read())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  return self.available_models
53
 
54
+ class TranslateCompletionsPostItem(BaseModel):
55
+ from_language: str = Field(
56
+ default="en",
57
+ description="(str) `Detect`",
58
+ )
59
+ to_language: str = Field(
60
+ default="fa",
61
+ description="(str) `en`",
62
+ )
63
+ input_text: str = Field(
64
+ default="Hello",
65
+ description="(str) `Text for translate`",
66
+ )
67
+
68
+
69
+ def translate_completions(self, item: TranslateCompletionsPostItem):
70
+ translator = Translator()
71
+ f = open('apis/lang_name.json', "r")
72
+ available_langs = json.loads(f.read())
73
+ from_lang = 'en'
74
+ to_lang = 'en'
75
+ for lang_item in available_langs:
76
+ if item.to_language == lang_item['code']:
77
+ to_lang = item.to_language
78
+ break
79
+
80
+
81
+ translated = translator.translate(item.input_text, dest=to_lang)
82
+ item_response = {
83
+ "from_language": translated.src,
84
+ "to_language": translated.dest,
85
+ "text": item.input_text,
86
+ "translate": translated.text
87
+ }
88
+ json_compatible_item_data = jsonable_encoder(item_response)
89
+ return JSONResponse(content=json_compatible_item_data)
90
+
91
+ def translate_ai_completions(self, item: TranslateCompletionsPostItem):
92
+ translator = Translator()
93
+ #print(os.getcwd())
94
+ f = open('apis/lang_name.json', "r")
95
+ available_langs = json.loads(f.read())
96
+ from_lang = 'en'
97
+ to_lang = 'en'
98
+ for lang_item in available_langs:
99
+ if item.to_language == lang_item['code']:
100
+ to_lang = item.to_language
101
+ if item.from_language == lang_item['code']:
102
+ from_lang = item.from_language
103
+
104
+ if to_lang == 'auto':
105
+ to_lang = 'en'
106
 
107
+ if from_lang == 'auto':
108
+ from_lang = translator.detect(item.input_text).lang
109
+
110
+ if torch.cuda.is_available():
111
+ device = torch.device("cuda:0")
112
  else:
113
+ device = torch.device("cpu")
114
+ logging.warning("GPU not found, using CPU, translation will be very slow.")
115
+
116
+ time_start = time.time()
117
+ #TRANSFORMERS_CACHE
118
+ pretrained_model = "facebook/m2m100_1.2B"
119
+ cache_dir = "models/"
120
+ tokenizer = M2M100Tokenizer.from_pretrained(pretrained_model, cache_dir=cache_dir)
121
+ model = M2M100ForConditionalGeneration.from_pretrained(
122
+ pretrained_model, cache_dir=cache_dir
123
+ ).to(device)
124
+ model.eval()
125
+
126
+ tokenizer.src_lang = from_lang
127
+ with torch.no_grad():
128
+ encoded_input = tokenizer(item.input_text, return_tensors="pt").to(device)
129
+ generated_tokens = model.generate(
130
+ **encoded_input, forced_bos_token_id=tokenizer.get_lang_id(to_lang)
131
+ )
132
+ translated_text = tokenizer.batch_decode(
133
+ generated_tokens, skip_special_tokens=True
134
+ )[0]
135
+
136
+ time_end = time.time()
137
+ translated = translated_text
138
+ item_response = {
139
+ "from_language": from_lang,
140
+ "to_language": to_lang,
141
+ "text": item.input_text,
142
+ "translate": translated,
143
+ "start": str(time_start),
144
+ "end": str(time_end)
145
+ }
146
+ json_compatible_item_data = jsonable_encoder(item_response)
147
+ return JSONResponse(content=json_compatible_item_data)
148
 
149
+ class TranslateAiPostItem(BaseModel):
150
  model: str = Field(
151
+ default="t5-base",
152
+ description="(str) `Model Name`",
153
  )
154
+ from_language: str = Field(
155
+ default="en",
156
+ description="(str) `translate from`",
157
  )
158
+ to_language: str = Field(
159
+ default="fa",
160
+ description="(str) `translate to`",
161
  )
162
+ input_text: str = Field(
163
+ default="Hello",
164
+ description="(str) `Text for translate`",
165
+ )
166
+ def ai_translate(self, item:TranslateAiPostItem):
167
+ MODEL_MAP = {
168
+ "t5-base": "t5-base",
169
+ "t5-small": "t5-small",
170
+ "t5-large": "t5-large",
171
+ "t5-3b": "t5-3b",
172
+ "mbart-large-50-many-to-many-mmt": "facebook/mbart-large-50-many-to-many-mmt",
173
+ "nllb-200-distilled-600M": "facebook/nllb-200-distilled-600M",
174
+ "madlad400-3b-mt": "jbochi/madlad400-3b-mt",
175
+ "default": "t5-base",
176
+ }
177
+ if item.model in MODEL_MAP.keys():
178
+ target_model = item.model
179
+ else:
180
+ target_model = "default"
181
+
182
+ real_name = MODEL_MAP[target_model]
183
+ read_model = AutoModelForSeq2SeqLM.from_pretrained(real_name)
184
+ tokenizer = AutoTokenizer.from_pretrained(real_name)
185
+ #translator = pipeline("translation", model=read_model, tokenizer=tokenizer, src_lang=item.from_language, tgt_lang=item.to_language)
186
+ translate_query = (
187
+ f"translation_{item.from_language}_to_{item.to_language}"
188
  )
189
+ translator = pipeline(translate_query)
190
+ result = translator(item.input_text)
191
+
192
+ item_response = {
193
+ "statue": 200,
194
+ "result": result,
195
+ }
196
+ json_compatible_item_data = jsonable_encoder(item_response)
197
+ return JSONResponse(content=json_compatible_item_data)
198
+ class DetectLanguagePostItem(BaseModel):
199
+ input_text: str = Field(
200
+ default="Hello, how are you?",
201
+ description="(str) `Text for detection`",
202
  )
203
 
204
+ def detect_language(self, item: DetectLanguagePostItem):
205
+ translator = Translator()
206
+ detected = translator.detect(item.input_text)
207
+
208
+ item_response = {
209
+ "lang": detected.lang,
210
+ "confidence": detected.confidence,
211
+ }
212
+ json_compatible_item_data = jsonable_encoder(item_response)
213
+ return JSONResponse(content=json_compatible_item_data)
214
+
215
+ class TTSPostItem(BaseModel):
216
+ input_text: str = Field(
217
+ default="Hello",
218
+ description="(str) `Text for TTS`",
219
  )
220
+ from_language: str = Field(
221
+ default="en",
222
+ description="(str) `TTS language`",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  )
224
+
225
+ def text_to_speech(self, item: TTSPostItem):
226
+ try:
227
+ audioobj = gTTS(text = item.input_text, lang = item.from_language, slow = False)
228
+ fileName = ''.join(random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(10));
229
+ fileName = fileName + ".mp3";
230
+ mp3_fp = BytesIO()
231
+ #audioobj.save(fileName)
232
+ #audioobj.write_to_fp(mp3_fp)
233
+ #buffer = bytearray(mp3_fp.read())
234
+ #base64EncodedStr = base64.encodebytes(buffer)
235
+ #mp3_fp.read()
236
+ #return Response(content=mp3_fp.tell(), media_type="audio/mpeg")
237
+ return StreamingResponse(audioobj.stream())
238
+ except:
239
+ item_response = {
240
+ "status": 400
241
+ }
242
+ json_compatible_item_data = jsonable_encoder(item_response)
243
+ return JSONResponse(content=json_compatible_item_data)
244
+
245
+
246
  def setup_routes(self):
247
+ for prefix in ["", "/v1"]:
 
 
 
 
 
248
  self.app.get(
249
+ prefix + "/langs",
250
+ summary="Get available languages",
251
+ )(self.get_available_langs)
252
+
253
+ self.app.post(
254
+ prefix + "/translate",
255
+ summary="translate text",
256
+ )(self.translate_completions)
257
+
258
+ self.app.post(
259
+ prefix + "/translate/ai",
260
+ summary="translate text with ai",
261
+ )(self.translate_ai_completions)
262
+
263
+ self.app.post(
264
+ prefix + "/detect",
265
+ summary="detect language",
266
+ )(self.detect_language)
267
 
268
  self.app.post(
269
+ prefix + "/tts",
270
+ summary="text to speech",
271
+ )(self.text_to_speech)
 
 
 
 
 
 
 
272
 
273
 
274
  class ArgParser(argparse.ArgumentParser):
 
303
 
304
  app = ChatAPIApp().app
305
 
306
+ app.add_middleware(
307
+ CORSMiddleware,
308
+ allow_origins=["*"],
309
+ allow_credentials=True,
310
+ allow_methods=["*"],
311
+ allow_headers=["*"],
312
+ )
313
+ @app.post("/transcribe")
314
+ async def whisper_transcribe(
315
+ audio_file: UploadFile = File(description="Audio file for transcribe"),
316
+ language: str = Form(),
317
+ model: str = Form(),
318
+ ):
319
+ MODEL_MAP = {
320
+ "whisper-small": "openai/whisper-small",
321
+ "whisper-medium": "openai/whisper-medium",
322
+ "whisper-large": "openai/whisper-large",
323
+ "default": "openai/whisper-small",
324
+ }
325
+ AUDIO_MAP = {
326
+ "audio/wav": "audio/wav",
327
+ "audio/mpeg": "audio/mpeg",
328
+ "audio/x-flac": "audio/x-flac",
329
+ }
330
+ item_response = {
331
+ "statue": 200,
332
+ "result": "",
333
+ "start": 0,
334
+ "end": 0
335
+ }
336
+ if audio_file.content_type in AUDIO_MAP.keys():
337
+ if model in MODEL_MAP.keys():
338
+ target_model = model
339
+ else:
340
+ target_model = "default"
341
+
342
+ real_name = MODEL_MAP[target_model]
343
+ device = 0 if torch.cuda.is_available() else "cpu"
344
+ pipe = pipeline(
345
+ task="automatic-speech-recognition",
346
+ model=real_name,
347
+ chunk_length_s=30,
348
+ device=device,
349
+ )
350
+ time_start = time.time()
351
+ pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language=language, task="transcribe")
352
+ try:
353
+ suffix = Path(audio_file.filename).suffix
354
+ with NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
355
+ shutil.copyfileobj(audio_file.file, tmp)
356
+ tmp_path = Path(tmp.name)
357
+ finally:
358
+ audio_file.file.close()
359
+ #file_data = await audio_file.read()
360
+ # rv = data.encode('utf-8')
361
+ #rv = base64.b64encode(file_data).decode()
362
+ #print(rv, "rvrvrvrvr")
363
+ audio_data = np.fromfile(tmp_path)
364
+ text = pipe(audio_data)["text"]
365
+ time_end = time.time()
366
+ item_response["status"] = 200
367
+ item_response["result"] = text
368
+ item_response["start"] = time_start
369
+ item_response["end"] = time_end
370
+ else:
371
+ item_response["status"] = 400
372
+ item_response["result"] = 'Acceptable files: audio/wav,audio/mpeg,audio/x-flac'
373
+
374
+
375
+ return item_response
376
+
377
  if __name__ == "__main__":
378
  args = ArgParser().args
379
  if args.dev:
 
382
  uvicorn.run("__main__:app", host=args.server, port=args.port, reload=False)
383
 
384
  # python -m apis.chat_api # [Docker] on product mode
385
+ # python -m apis.chat_api -d # [Dev] on develop mode
apis/lang_name.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"code":"auto","name":"Detect language"},{"code":"fa","name":"Persian"},{"code":"en","name":"English"},{"code":"af","name":"Afrikaans"},{"code":"sq","name":"Albanian"},{"code":"am","name":"Amharic"},{"code":"ar","name":"Arabic"},{"code":"hy","name":"Armenian"},{"code":"as","name":"Assamese"},{"code":"ay","name":"Aymara"},{"code":"az","name":"Azerbaijani"},{"code":"bm","name":"Bambara"},{"code":"eu","name":"Basque"},{"code":"be","name":"Belarusian"},{"code":"bn","name":"Bengali"},{"code":"bho","name":"Bhojpuri"},{"code":"bs","name":"Bosnian"},{"code":"bg","name":"Bulgarian"},{"code":"ca","name":"Catalan"},{"code":"ceb","name":"Cebuano"},{"code":"ny","name":"Chichewa"},{"code":"zh-CN","name":"Chinese (Simplified)"},{"code":"zh-TW","name":"Chinese (Traditional)"},{"code":"co","name":"Corsican"},{"code":"hr","name":"Croatian"},{"code":"cs","name":"Czech"},{"code":"da","name":"Danish"},{"code":"dv","name":"Dhivehi"},{"code":"doi","name":"Dogri"},{"code":"nl","name":"Dutch"},{"code":"en","name":"English(recently used language)"},{"code":"eo","name":"Esperanto"},{"code":"et","name":"Estonian"},{"code":"ee","name":"Ewe"},{"code":"tl","name":"Filipino"},{"code":"fi","name":"Finnish"},{"code":"fr","name":"French"},{"code":"fy","name":"Frisian"},{"code":"gl","name":"Galician"},{"code":"ka","name":"Georgian"},{"code":"de","name":"German"},{"code":"el","name":"Greek"},{"code":"gn","name":"Guarani"},{"code":"gu","name":"Gujarati"},{"code":"ht","name":"Haitian Creole"},{"code":"ha","name":"Hausa"},{"code":"haw","name":"Hawaiian"},{"code":"iw","name":"Hebrew"},{"code":"hi","name":"Hindi"},{"code":"hmn","name":"Hmong"},{"code":"hu","name":"Hungarian"},{"code":"is","name":"Icelandic"},{"code":"ig","name":"Igbo"},{"code":"ilo","name":"Ilocano"},{"code":"id","name":"Indonesian"},{"code":"ga","name":"Irish"},{"code":"it","name":"Italian"},{"code":"ja","name":"Japanese"},{"code":"jw","name":"Javanese"},{"code":"kn","name":"Kannada"},{"code":"kk","name":"Kazakh"},{"code":"km","name":"Khmer"},{"code":"rw","name":"Kinyarwanda"},{"code":"gom","name":"Konkani"},{"code":"ko","name":"Korean"},{"code":"kri","name":"Krio"},{"code":"ku","name":"Kurdish (Kurmanji)"},{"code":"ckb","name":"Kurdish (Sorani)"},{"code":"ky","name":"Kyrgyz"},{"code":"lo","name":"Lao"},{"code":"la","name":"Latin"},{"code":"lv","name":"Latvian"},{"code":"ln","name":"Lingala"},{"code":"lt","name":"Lithuanian"},{"code":"lg","name":"Luganda"},{"code":"lb","name":"Luxembourgish"},{"code":"mk","name":"Macedonian"},{"code":"mai","name":"Maithili"},{"code":"mg","name":"Malagasy"},{"code":"ms","name":"Malay"},{"code":"ml","name":"Malayalam"},{"code":"mt","name":"Maltese"},{"code":"mi","name":"Maori"},{"code":"mr","name":"Marathi"},{"code":"mni-Mtei","name":"Meiteilon (Manipuri)"},{"code":"lus","name":"Mizo"},{"code":"mn","name":"Mongolian"},{"code":"my","name":"Myanmar (Burmese)"},{"code":"ne","name":"Nepali"},{"code":"no","name":"Norwegian"},{"code":"or","name":"Odia (Oriya)"},{"code":"om","name":"Oromo"},{"code":"ps","name":"Pashto"},{"code":"fa","name":"Persian(recently used language)"},{"code":"pl","name":"Polish"},{"code":"pt","name":"Portuguese"},{"code":"pa","name":"Punjabi"},{"code":"qu","name":"Quechua"},{"code":"ro","name":"Romanian"},{"code":"ru","name":"Russian"},{"code":"sm","name":"Samoan"},{"code":"sa","name":"Sanskrit"},{"code":"gd","name":"Scots Gaelic"},{"code":"nso","name":"Sepedi"},{"code":"sr","name":"Serbian"},{"code":"st","name":"Sesotho"},{"code":"sn","name":"Shona"},{"code":"sd","name":"Sindhi"},{"code":"si","name":"Sinhala"},{"code":"sk","name":"Slovak"},{"code":"sl","name":"Slovenian"},{"code":"so","name":"Somali"},{"code":"es","name":"Spanish"},{"code":"su","name":"Sundanese"},{"code":"sw","name":"Swahili"},{"code":"sv","name":"Swedish"},{"code":"tg","name":"Tajik"},{"code":"ta","name":"Tamil"},{"code":"tt","name":"Tatar"},{"code":"te","name":"Telugu"},{"code":"th","name":"Thai"},{"code":"ti","name":"Tigrinya"},{"code":"ts","name":"Tsonga"},{"code":"tr","name":"Turkish"},{"code":"tk","name":"Turkmen"},{"code":"ak","name":"Twi"},{"code":"uk","name":"Ukrainian"},{"code":"ur","name":"Urdu"},{"code":"ug","name":"Uyghur"},{"code":"uz","name":"Uzbek"},{"code":"vi","name":"Vietnamese"},{"code":"cy","name":"Welsh"},{"code":"xh","name":"Xhosa"},{"code":"yi","name":"Yiddish"},{"code":"yo","name":"Yoruba"},{"code":"zu","name":"Zulu"},{"code":"ckb","name":"Kurdish (Sorani)"},{"code":"ku","name":"Kurdish (Kurmanji)"},{"code":"en","name":"English"},{"code":"fa","name":"Persian"},{"code":"af","name":"Afrikaans"},{"code":"sq","name":"Albanian"},{"code":"am","name":"Amharic"},{"code":"ar","name":"Arabic"},{"code":"hy","name":"Armenian"},{"code":"as","name":"Assamese"},{"code":"ay","name":"Aymara"},{"code":"az","name":"Azerbaijani"},{"code":"bm","name":"Bambara"},{"code":"eu","name":"Basque"},{"code":"be","name":"Belarusian"},{"code":"bn","name":"Bengali"},{"code":"bho","name":"Bhojpuri"},{"code":"bs","name":"Bosnian"},{"code":"bg","name":"Bulgarian"},{"code":"ca","name":"Catalan"},{"code":"ceb","name":"Cebuano"},{"code":"ny","name":"Chichewa"},{"code":"zh-CN","name":"Chinese (Simplified)"},{"code":"zh-TW","name":"Chinese (Traditional)"},{"code":"co","name":"Corsican"},{"code":"hr","name":"Croatian"},{"code":"cs","name":"Czech"},{"code":"da","name":"Danish"},{"code":"dv","name":"Dhivehi"},{"code":"doi","name":"Dogri"},{"code":"nl","name":"Dutch"},{"code":"en","name":"English(recently used language)"},{"code":"eo","name":"Esperanto"},{"code":"et","name":"Estonian"},{"code":"ee","name":"Ewe"},{"code":"tl","name":"Filipino"},{"code":"fi","name":"Finnish"},{"code":"fr","name":"French"},{"code":"fy","name":"Frisian"},{"code":"gl","name":"Galician"},{"code":"ka","name":"Georgian"},{"code":"de","name":"German"},{"code":"el","name":"Greek"},{"code":"gn","name":"Guarani"},{"code":"gu","name":"Gujarati"},{"code":"ht","name":"Haitian Creole"},{"code":"ha","name":"Hausa"},{"code":"haw","name":"Hawaiian"},{"code":"iw","name":"Hebrew"},{"code":"hi","name":"Hindi"},{"code":"hmn","name":"Hmong"},{"code":"hu","name":"Hungarian"},{"code":"is","name":"Icelandic"},{"code":"ig","name":"Igbo"},{"code":"ilo","name":"Ilocano"},{"code":"id","name":"Indonesian"},{"code":"ga","name":"Irish"},{"code":"it","name":"Italian"},{"code":"ja","name":"Japanese"},{"code":"jw","name":"Javanese"},{"code":"kn","name":"Kannada"},{"code":"kk","name":"Kazakh"},{"code":"km","name":"Khmer"},{"code":"rw","name":"Kinyarwanda"},{"code":"gom","name":"Konkani"},{"code":"ko","name":"Korean"},{"code":"kri","name":"Krio"},{"code":"ku","name":"Kurdish (Kurmanji)(recently used language)"},{"code":"ckb","name":"Kurdish (Sorani)(recently used language)"},{"code":"ky","name":"Kyrgyz"},{"code":"lo","name":"Lao"},{"code":"la","name":"Latin"},{"code":"lv","name":"Latvian"},{"code":"ln","name":"Lingala"},{"code":"lt","name":"Lithuanian"},{"code":"lg","name":"Luganda"},{"code":"lb","name":"Luxembourgish"},{"code":"mk","name":"Macedonian"},{"code":"mai","name":"Maithili"},{"code":"mg","name":"Malagasy"},{"code":"ms","name":"Malay"},{"code":"ml","name":"Malayalam"},{"code":"mt","name":"Maltese"},{"code":"mi","name":"Maori"},{"code":"mr","name":"Marathi"},{"code":"mni-Mtei","name":"Meiteilon (Manipuri)"},{"code":"lus","name":"Mizo"},{"code":"mn","name":"Mongolian"},{"code":"my","name":"Myanmar (Burmese)"},{"code":"ne","name":"Nepali"},{"code":"no","name":"Norwegian"},{"code":"or","name":"Odia (Oriya)"},{"code":"om","name":"Oromo"},{"code":"ps","name":"Pashto"},{"code":"fa","name":"Persian(recently used language)"},{"code":"pl","name":"Polish"},{"code":"pt","name":"Portuguese"},{"code":"pa","name":"Punjabi"},{"code":"qu","name":"Quechua"},{"code":"ro","name":"Romanian"},{"code":"ru","name":"Russian"},{"code":"sm","name":"Samoan"},{"code":"sa","name":"Sanskrit"},{"code":"gd","name":"Scots Gaelic"},{"code":"nso","name":"Sepedi"},{"code":"sr","name":"Serbian"},{"code":"st","name":"Sesotho"},{"code":"sn","name":"Shona"},{"code":"sd","name":"Sindhi"},{"code":"si","name":"Sinhala"},{"code":"sk","name":"Slovak"},{"code":"sl","name":"Slovenian"},{"code":"so","name":"Somali"},{"code":"es","name":"Spanish"},{"code":"su","name":"Sundanese"},{"code":"sw","name":"Swahili"},{"code":"sv","name":"Swedish"},{"code":"tg","name":"Tajik"},{"code":"ta","name":"Tamil"},{"code":"tt","name":"Tatar"},{"code":"te","name":"Telugu"},{"code":"th","name":"Thai"},{"code":"ti","name":"Tigrinya"},{"code":"ts","name":"Tsonga"},{"code":"tr","name":"Turkish"},{"code":"tk","name":"Turkmen"},{"code":"ak","name":"Twi"},{"code":"uk","name":"Ukrainian"},{"code":"ur","name":"Urdu"},{"code":"ug","name":"Uyghur"},{"code":"uz","name":"Uzbek"},{"code":"vi","name":"Vietnamese"},{"code":"cy","name":"Welsh"},{"code":"xh","name":"Xhosa"},{"code":"yi","name":"Yiddish"},{"code":"yo","name":"Yoruba"},{"code":"zu","name":"Zulu"},{"code":"auto","name":"Detect language"},{"code":"fa","name":"Persian"},{"code":"en","name":"English"},{"code":"af","name":"Afrikaans"},{"code":"sq","name":"Albanian"},{"code":"am","name":"Amharic"},{"code":"ar","name":"Arabic"},{"code":"hy","name":"Armenian"},{"code":"as","name":"Assamese"},{"code":"ay","name":"Aymara"},{"code":"az","name":"Azerbaijani"},{"code":"bm","name":"Bambara"},{"code":"eu","name":"Basque"},{"code":"be","name":"Belarusian"},{"code":"bn","name":"Bengali"},{"code":"bho","name":"Bhojpuri"},{"code":"bs","name":"Bosnian"},{"code":"bg","name":"Bulgarian"},{"code":"ca","name":"Catalan"},{"code":"ceb","name":"Cebuano"},{"code":"ny","name":"Chichewa"},{"code":"zh-CN","name":"Chinese (Simplified)"},{"code":"zh-TW","name":"Chinese (Traditional)"},{"code":"co","name":"Corsican"},{"code":"hr","name":"Croatian"},{"code":"cs","name":"Czech"},{"code":"da","name":"Danish"},{"code":"dv","name":"Dhivehi"},{"code":"doi","name":"Dogri"},{"code":"nl","name":"Dutch"},{"code":"en","name":"English(recently used language)"},{"code":"eo","name":"Esperanto"},{"code":"et","name":"Estonian"},{"code":"ee","name":"Ewe"},{"code":"tl","name":"Filipino"},{"code":"fi","name":"Finnish"},{"code":"fr","name":"French"},{"code":"fy","name":"Frisian"},{"code":"gl","name":"Galician"},{"code":"ka","name":"Georgian"},{"code":"de","name":"German"},{"code":"el","name":"Greek"},{"code":"gn","name":"Guarani"},{"code":"gu","name":"Gujarati"},{"code":"ht","name":"Haitian Creole"},{"code":"ha","name":"Hausa"},{"code":"haw","name":"Hawaiian"},{"code":"iw","name":"Hebrew"},{"code":"hi","name":"Hindi"},{"code":"hmn","name":"Hmong"},{"code":"hu","name":"Hungarian"},{"code":"is","name":"Icelandic"},{"code":"ig","name":"Igbo"},{"code":"ilo","name":"Ilocano"},{"code":"id","name":"Indonesian"},{"code":"ga","name":"Irish"},{"code":"it","name":"Italian"},{"code":"ja","name":"Japanese"},{"code":"jw","name":"Javanese"},{"code":"kn","name":"Kannada"},{"code":"kk","name":"Kazakh"},{"code":"km","name":"Khmer"},{"code":"rw","name":"Kinyarwanda"},{"code":"gom","name":"Konkani"},{"code":"ko","name":"Korean"},{"code":"kri","name":"Krio"},{"code":"ku","name":"Kurdish (Kurmanji)"},{"code":"ckb","name":"Kurdish (Sorani)"},{"code":"ky","name":"Kyrgyz"},{"code":"lo","name":"Lao"},{"code":"la","name":"Latin"},{"code":"lv","name":"Latvian"},{"code":"ln","name":"Lingala"},{"code":"lt","name":"Lithuanian"},{"code":"lg","name":"Luganda"},{"code":"lb","name":"Luxembourgish"},{"code":"mk","name":"Macedonian"},{"code":"mai","name":"Maithili"},{"code":"mg","name":"Malagasy"},{"code":"ms","name":"Malay"},{"code":"ml","name":"Malayalam"},{"code":"mt","name":"Maltese"},{"code":"mi","name":"Maori"},{"code":"mr","name":"Marathi"},{"code":"mni-Mtei","name":"Meiteilon (Manipuri)"},{"code":"lus","name":"Mizo"},{"code":"mn","name":"Mongolian"},{"code":"my","name":"Myanmar (Burmese)"},{"code":"ne","name":"Nepali"},{"code":"no","name":"Norwegian"},{"code":"or","name":"Odia (Oriya)"},{"code":"om","name":"Oromo"},{"code":"ps","name":"Pashto"},{"code":"fa","name":"Persian(recently used language)"},{"code":"pl","name":"Polish"},{"code":"pt","name":"Portuguese"},{"code":"pa","name":"Punjabi"},{"code":"qu","name":"Quechua"},{"code":"ro","name":"Romanian"},{"code":"ru","name":"Russian"},{"code":"sm","name":"Samoan"},{"code":"sa","name":"Sanskrit"},{"code":"gd","name":"Scots Gaelic"},{"code":"nso","name":"Sepedi"},{"code":"sr","name":"Serbian"},{"code":"st","name":"Sesotho"},{"code":"sn","name":"Shona"},{"code":"sd","name":"Sindhi"},{"code":"si","name":"Sinhala"},{"code":"sk","name":"Slovak"},{"code":"sl","name":"Slovenian"},{"code":"so","name":"Somali"},{"code":"es","name":"Spanish"},{"code":"su","name":"Sundanese"},{"code":"sw","name":"Swahili"},{"code":"sv","name":"Swedish"},{"code":"tg","name":"Tajik"},{"code":"ta","name":"Tamil"},{"code":"tt","name":"Tatar"},{"code":"te","name":"Telugu"},{"code":"th","name":"Thai"},{"code":"ti","name":"Tigrinya"},{"code":"ts","name":"Tsonga"},{"code":"tr","name":"Turkish"},{"code":"tk","name":"Turkmen"},{"code":"ak","name":"Twi"},{"code":"uk","name":"Ukrainian"},{"code":"ur","name":"Urdu"},{"code":"ug","name":"Uyghur"},{"code":"uz","name":"Uzbek"},{"code":"vi","name":"Vietnamese"},{"code":"cy","name":"Welsh"},{"code":"xh","name":"Xhosa"},{"code":"yi","name":"Yiddish"},{"code":"yo","name":"Yoruba"},{"code":"zu","name":"Zulu"},{"code":"ckb","name":"Kurdish (Sorani)"},{"code":"ku","name":"Kurdish (Kurmanji)"},{"code":"en","name":"English"},{"code":"fa","name":"Persian"},{"code":"af","name":"Afrikaans"},{"code":"sq","name":"Albanian"},{"code":"am","name":"Amharic"},{"code":"ar","name":"Arabic"},{"code":"hy","name":"Armenian"},{"code":"as","name":"Assamese"},{"code":"ay","name":"Aymara"},{"code":"az","name":"Azerbaijani"},{"code":"bm","name":"Bambara"},{"code":"eu","name":"Basque"},{"code":"be","name":"Belarusian"},{"code":"bn","name":"Bengali"},{"code":"bho","name":"Bhojpuri"},{"code":"bs","name":"Bosnian"},{"code":"bg","name":"Bulgarian"},{"code":"ca","name":"Catalan"},{"code":"ceb","name":"Cebuano"},{"code":"ny","name":"Chichewa"},{"code":"zh-CN","name":"Chinese (Simplified)"},{"code":"zh-TW","name":"Chinese (Traditional)"},{"code":"co","name":"Corsican"},{"code":"hr","name":"Croatian"},{"code":"cs","name":"Czech"},{"code":"da","name":"Danish"},{"code":"dv","name":"Dhivehi"},{"code":"doi","name":"Dogri"},{"code":"nl","name":"Dutch"},{"code":"en","name":"English(recently used language)"},{"code":"eo","name":"Esperanto"},{"code":"et","name":"Estonian"},{"code":"ee","name":"Ewe"},{"code":"tl","name":"Filipino"},{"code":"fi","name":"Finnish"},{"code":"fr","name":"French"},{"code":"fy","name":"Frisian"},{"code":"gl","name":"Galician"},{"code":"ka","name":"Georgian"},{"code":"de","name":"German"},{"code":"el","name":"Greek"},{"code":"gn","name":"Guarani"},{"code":"gu","name":"Gujarati"},{"code":"ht","name":"Haitian Creole"},{"code":"ha","name":"Hausa"},{"code":"haw","name":"Hawaiian"},{"code":"iw","name":"Hebrew"},{"code":"hi","name":"Hindi"},{"code":"hmn","name":"Hmong"},{"code":"hu","name":"Hungarian"},{"code":"is","name":"Icelandic"},{"code":"ig","name":"Igbo"},{"code":"ilo","name":"Ilocano"},{"code":"id","name":"Indonesian"},{"code":"ga","name":"Irish"},{"code":"it","name":"Italian"},{"code":"ja","name":"Japanese"},{"code":"jw","name":"Javanese"},{"code":"kn","name":"Kannada"},{"code":"kk","name":"Kazakh"},{"code":"km","name":"Khmer"},{"code":"rw","name":"Kinyarwanda"},{"code":"gom","name":"Konkani"},{"code":"ko","name":"Korean"},{"code":"kri","name":"Krio"},{"code":"ku","name":"Kurdish (Kurmanji)(recently used language)"},{"code":"ckb","name":"Kurdish (Sorani)(recently used language)"},{"code":"ky","name":"Kyrgyz"},{"code":"lo","name":"Lao"},{"code":"la","name":"Latin"},{"code":"lv","name":"Latvian"},{"code":"ln","name":"Lingala"},{"code":"lt","name":"Lithuanian"},{"code":"lg","name":"Luganda"},{"code":"lb","name":"Luxembourgish"},{"code":"mk","name":"Macedonian"},{"code":"mai","name":"Maithili"},{"code":"mg","name":"Malagasy"},{"code":"ms","name":"Malay"},{"code":"ml","name":"Malayalam"},{"code":"mt","name":"Maltese"},{"code":"mi","name":"Maori"},{"code":"mr","name":"Marathi"},{"code":"mni-Mtei","name":"Meiteilon (Manipuri)"},{"code":"lus","name":"Mizo"},{"code":"mn","name":"Mongolian"},{"code":"my","name":"Myanmar (Burmese)"},{"code":"ne","name":"Nepali"},{"code":"no","name":"Norwegian"},{"code":"or","name":"Odia (Oriya)"},{"code":"om","name":"Oromo"},{"code":"ps","name":"Pashto"},{"code":"fa","name":"Persian(recently used language)"},{"code":"pl","name":"Polish"},{"code":"pt","name":"Portuguese"},{"code":"pa","name":"Punjabi"},{"code":"qu","name":"Quechua"},{"code":"ro","name":"Romanian"},{"code":"ru","name":"Russian"},{"code":"sm","name":"Samoan"},{"code":"sa","name":"Sanskrit"},{"code":"gd","name":"Scots Gaelic"},{"code":"nso","name":"Sepedi"},{"code":"sr","name":"Serbian"},{"code":"st","name":"Sesotho"},{"code":"sn","name":"Shona"},{"code":"sd","name":"Sindhi"},{"code":"si","name":"Sinhala"},{"code":"sk","name":"Slovak"},{"code":"sl","name":"Slovenian"},{"code":"so","name":"Somali"},{"code":"es","name":"Spanish"},{"code":"su","name":"Sundanese"},{"code":"sw","name":"Swahili"},{"code":"sv","name":"Swedish"},{"code":"tg","name":"Tajik"},{"code":"ta","name":"Tamil"},{"code":"tt","name":"Tatar"},{"code":"te","name":"Telugu"},{"code":"th","name":"Thai"},{"code":"ti","name":"Tigrinya"},{"code":"ts","name":"Tsonga"},{"code":"tr","name":"Turkish"},{"code":"tk","name":"Turkmen"},{"code":"ak","name":"Twi"},{"code":"uk","name":"Ukrainian"},{"code":"ur","name":"Urdu"},{"code":"ug","name":"Uyghur"},{"code":"uz","name":"Uzbek"},{"code":"vi","name":"Vietnamese"},{"code":"cy","name":"Welsh"},{"code":"xh","name":"Xhosa"},{"code":"yi","name":"Yiddish"},{"code":"yo","name":"Yoruba"},{"code":"zu","name":"Zulu"},{"code":"auto","name":"Detect language"},{"code":"fa","name":"Persian"},{"code":"en","name":"English"},{"code":"af","name":"Afrikaans"},{"code":"sq","name":"Albanian"},{"code":"am","name":"Amharic"},{"code":"ar","name":"Arabic"},{"code":"hy","name":"Armenian"},{"code":"as","name":"Assamese"},{"code":"ay","name":"Aymara"},{"code":"az","name":"Azerbaijani"},{"code":"bm","name":"Bambara"},{"code":"eu","name":"Basque"},{"code":"be","name":"Belarusian"},{"code":"bn","name":"Bengali"},{"code":"bho","name":"Bhojpuri"},{"code":"bs","name":"Bosnian"},{"code":"bg","name":"Bulgarian"},{"code":"ca","name":"Catalan"},{"code":"ceb","name":"Cebuano"},{"code":"ny","name":"Chichewa"},{"code":"zh-CN","name":"Chinese (Simplified)"},{"code":"zh-TW","name":"Chinese (Traditional)"},{"code":"co","name":"Corsican"},{"code":"hr","name":"Croatian"},{"code":"cs","name":"Czech"},{"code":"da","name":"Danish"},{"code":"dv","name":"Dhivehi"},{"code":"doi","name":"Dogri"},{"code":"nl","name":"Dutch"},{"code":"en","name":"English(recently used language)"},{"code":"eo","name":"Esperanto"},{"code":"et","name":"Estonian"},{"code":"ee","name":"Ewe"},{"code":"tl","name":"Filipino"},{"code":"fi","name":"Finnish"},{"code":"fr","name":"French"},{"code":"fy","name":"Frisian"},{"code":"gl","name":"Galician"},{"code":"ka","name":"Georgian"},{"code":"de","name":"German"},{"code":"el","name":"Greek"},{"code":"gn","name":"Guarani"},{"code":"gu","name":"Gujarati"},{"code":"ht","name":"Haitian Creole"},{"code":"ha","name":"Hausa"},{"code":"haw","name":"Hawaiian"},{"code":"iw","name":"Hebrew"},{"code":"hi","name":"Hindi"},{"code":"hmn","name":"Hmong"},{"code":"hu","name":"Hungarian"},{"code":"is","name":"Icelandic"},{"code":"ig","name":"Igbo"},{"code":"ilo","name":"Ilocano"},{"code":"id","name":"Indonesian"},{"code":"ga","name":"Irish"},{"code":"it","name":"Italian"},{"code":"ja","name":"Japanese"},{"code":"jw","name":"Javanese"},{"code":"kn","name":"Kannada"},{"code":"kk","name":"Kazakh"},{"code":"km","name":"Khmer"},{"code":"rw","name":"Kinyarwanda"},{"code":"gom","name":"Konkani"},{"code":"ko","name":"Korean"},{"code":"kri","name":"Krio"},{"code":"ku","name":"Kurdish (Kurmanji)"},{"code":"ckb","name":"Kurdish (Sorani)"},{"code":"ky","name":"Kyrgyz"},{"code":"lo","name":"Lao"},{"code":"la","name":"Latin"},{"code":"lv","name":"Latvian"},{"code":"ln","name":"Lingala"},{"code":"lt","name":"Lithuanian"},{"code":"lg","name":"Luganda"},{"code":"lb","name":"Luxembourgish"},{"code":"mk","name":"Macedonian"},{"code":"mai","name":"Maithili"},{"code":"mg","name":"Malagasy"},{"code":"ms","name":"Malay"},{"code":"ml","name":"Malayalam"},{"code":"mt","name":"Maltese"},{"code":"mi","name":"Maori"},{"code":"mr","name":"Marathi"},{"code":"mni-Mtei","name":"Meiteilon (Manipuri)"},{"code":"lus","name":"Mizo"},{"code":"mn","name":"Mongolian"},{"code":"my","name":"Myanmar (Burmese)"},{"code":"ne","name":"Nepali"},{"code":"no","name":"Norwegian"},{"code":"or","name":"Odia (Oriya)"},{"code":"om","name":"Oromo"},{"code":"ps","name":"Pashto"},{"code":"fa","name":"Persian(recently used language)"},{"code":"pl","name":"Polish"},{"code":"pt","name":"Portuguese"},{"code":"pa","name":"Punjabi"},{"code":"qu","name":"Quechua"},{"code":"ro","name":"Romanian"},{"code":"ru","name":"Russian"},{"code":"sm","name":"Samoan"},{"code":"sa","name":"Sanskrit"},{"code":"gd","name":"Scots Gaelic"},{"code":"nso","name":"Sepedi"},{"code":"sr","name":"Serbian"},{"code":"st","name":"Sesotho"},{"code":"sn","name":"Shona"},{"code":"sd","name":"Sindhi"},{"code":"si","name":"Sinhala"},{"code":"sk","name":"Slovak"},{"code":"sl","name":"Slovenian"},{"code":"so","name":"Somali"},{"code":"es","name":"Spanish"},{"code":"su","name":"Sundanese"},{"code":"sw","name":"Swahili"},{"code":"sv","name":"Swedish"},{"code":"tg","name":"Tajik"},{"code":"ta","name":"Tamil"},{"code":"tt","name":"Tatar"},{"code":"te","name":"Telugu"},{"code":"th","name":"Thai"},{"code":"ti","name":"Tigrinya"},{"code":"ts","name":"Tsonga"},{"code":"tr","name":"Turkish"},{"code":"tk","name":"Turkmen"},{"code":"ak","name":"Twi"},{"code":"uk","name":"Ukrainian"},{"code":"ur","name":"Urdu"},{"code":"ug","name":"Uyghur"},{"code":"uz","name":"Uzbek"},{"code":"vi","name":"Vietnamese"},{"code":"cy","name":"Welsh"},{"code":"xh","name":"Xhosa"},{"code":"yi","name":"Yiddish"},{"code":"yo","name":"Yoruba"},{"code":"zu","name":"Zulu"},{"code":"ckb","name":"Kurdish (Sorani)"},{"code":"ku","name":"Kurdish (Kurmanji)"},{"code":"en","name":"English"},{"code":"fa","name":"Persian"},{"code":"af","name":"Afrikaans"},{"code":"sq","name":"Albanian"},{"code":"am","name":"Amharic"},{"code":"ar","name":"Arabic"},{"code":"hy","name":"Armenian"},{"code":"as","name":"Assamese"},{"code":"ay","name":"Aymara"},{"code":"az","name":"Azerbaijani"},{"code":"bm","name":"Bambara"},{"code":"eu","name":"Basque"},{"code":"be","name":"Belarusian"},{"code":"bn","name":"Bengali"},{"code":"bho","name":"Bhojpuri"},{"code":"bs","name":"Bosnian"},{"code":"bg","name":"Bulgarian"},{"code":"ca","name":"Catalan"},{"code":"ceb","name":"Cebuano"},{"code":"ny","name":"Chichewa"},{"code":"zh-CN","name":"Chinese (Simplified)"},{"code":"zh-TW","name":"Chinese (Traditional)"},{"code":"co","name":"Corsican"},{"code":"hr","name":"Croatian"},{"code":"cs","name":"Czech"},{"code":"da","name":"Danish"},{"code":"dv","name":"Dhivehi"},{"code":"doi","name":"Dogri"},{"code":"nl","name":"Dutch"},{"code":"en","name":"English(recently used language)"},{"code":"eo","name":"Esperanto"},{"code":"et","name":"Estonian"},{"code":"ee","name":"Ewe"},{"code":"tl","name":"Filipino"},{"code":"fi","name":"Finnish"},{"code":"fr","name":"French"},{"code":"fy","name":"Frisian"},{"code":"gl","name":"Galician"},{"code":"ka","name":"Georgian"},{"code":"de","name":"German"},{"code":"el","name":"Greek"},{"code":"gn","name":"Guarani"},{"code":"gu","name":"Gujarati"},{"code":"ht","name":"Haitian Creole"},{"code":"ha","name":"Hausa"},{"code":"haw","name":"Hawaiian"},{"code":"iw","name":"Hebrew"},{"code":"hi","name":"Hindi"},{"code":"hmn","name":"Hmong"},{"code":"hu","name":"Hungarian"},{"code":"is","name":"Icelandic"},{"code":"ig","name":"Igbo"},{"code":"ilo","name":"Ilocano"},{"code":"id","name":"Indonesian"},{"code":"ga","name":"Irish"},{"code":"it","name":"Italian"},{"code":"ja","name":"Japanese"},{"code":"jw","name":"Javanese"},{"code":"kn","name":"Kannada"},{"code":"kk","name":"Kazakh"},{"code":"km","name":"Khmer"},{"code":"rw","name":"Kinyarwanda"},{"code":"gom","name":"Konkani"},{"code":"ko","name":"Korean"},{"code":"kri","name":"Krio"},{"code":"ku","name":"Kurdish (Kurmanji)(recently used language)"},{"code":"ckb","name":"Kurdish (Sorani)(recently used language)"},{"code":"ky","name":"Kyrgyz"},{"code":"lo","name":"Lao"},{"code":"la","name":"Latin"},{"code":"lv","name":"Latvian"},{"code":"ln","name":"Lingala"},{"code":"lt","name":"Lithuanian"},{"code":"lg","name":"Luganda"},{"code":"lb","name":"Luxembourgish"},{"code":"mk","name":"Macedonian"},{"code":"mai","name":"Maithili"},{"code":"mg","name":"Malagasy"},{"code":"ms","name":"Malay"},{"code":"ml","name":"Malayalam"},{"code":"mt","name":"Maltese"},{"code":"mi","name":"Maori"},{"code":"mr","name":"Marathi"},{"code":"mni-Mtei","name":"Meiteilon (Manipuri)"},{"code":"lus","name":"Mizo"},{"code":"mn","name":"Mongolian"},{"code":"my","name":"Myanmar (Burmese)"},{"code":"ne","name":"Nepali"},{"code":"no","name":"Norwegian"},{"code":"or","name":"Odia (Oriya)"},{"code":"om","name":"Oromo"},{"code":"ps","name":"Pashto"},{"code":"fa","name":"Persian(recently used language)"},{"code":"pl","name":"Polish"},{"code":"pt","name":"Portuguese"},{"code":"pa","name":"Punjabi"},{"code":"qu","name":"Quechua"},{"code":"ro","name":"Romanian"},{"code":"ru","name":"Russian"},{"code":"sm","name":"Samoan"},{"code":"sa","name":"Sanskrit"},{"code":"gd","name":"Scots Gaelic"},{"code":"nso","name":"Sepedi"},{"code":"sr","name":"Serbian"},{"code":"st","name":"Sesotho"},{"code":"sn","name":"Shona"},{"code":"sd","name":"Sindhi"},{"code":"si","name":"Sinhala"},{"code":"sk","name":"Slovak"},{"code":"sl","name":"Slovenian"},{"code":"so","name":"Somali"},{"code":"es","name":"Spanish"},{"code":"su","name":"Sundanese"},{"code":"sw","name":"Swahili"},{"code":"sv","name":"Swedish"},{"code":"tg","name":"Tajik"},{"code":"ta","name":"Tamil"},{"code":"tt","name":"Tatar"},{"code":"te","name":"Telugu"},{"code":"th","name":"Thai"},{"code":"ti","name":"Tigrinya"},{"code":"ts","name":"Tsonga"},{"code":"tr","name":"Turkish"},{"code":"tk","name":"Turkmen"},{"code":"ak","name":"Twi"},{"code":"uk","name":"Ukrainian"},{"code":"ur","name":"Urdu"},{"code":"ug","name":"Uyghur"},{"code":"uz","name":"Uzbek"},{"code":"vi","name":"Vietnamese"},{"code":"cy","name":"Welsh"},{"code":"xh","name":"Xhosa"},{"code":"yi","name":"Yiddish"},{"code":"yo","name":"Yoruba"},{"code":"zu","name":"Zulu"},{"code":"auto","name":"Detect language"},{"code":"fa","name":"Persian"},{"code":"en","name":"English"},{"code":"af","name":"Afrikaans"},{"code":"sq","name":"Albanian"},{"code":"am","name":"Amharic"},{"code":"ar","name":"Arabic"},{"code":"hy","name":"Armenian"},{"code":"as","name":"Assamese"},{"code":"ay","name":"Aymara"},{"code":"az","name":"Azerbaijani"},{"code":"bm","name":"Bambara"},{"code":"eu","name":"Basque"},{"code":"be","name":"Belarusian"},{"code":"bn","name":"Bengali"},{"code":"bho","name":"Bhojpuri"},{"code":"bs","name":"Bosnian"},{"code":"bg","name":"Bulgarian"},{"code":"ca","name":"Catalan"},{"code":"ceb","name":"Cebuano"},{"code":"ny","name":"Chichewa"},{"code":"zh-CN","name":"Chinese (Simplified)"},{"code":"zh-TW","name":"Chinese (Traditional)"},{"code":"co","name":"Corsican"},{"code":"hr","name":"Croatian"},{"code":"cs","name":"Czech"},{"code":"da","name":"Danish"},{"code":"dv","name":"Dhivehi"},{"code":"doi","name":"Dogri"},{"code":"nl","name":"Dutch"},{"code":"en","name":"English(recently used language)"},{"code":"eo","name":"Esperanto"},{"code":"et","name":"Estonian"},{"code":"ee","name":"Ewe"},{"code":"tl","name":"Filipino"},{"code":"fi","name":"Finnish"},{"code":"fr","name":"French"},{"code":"fy","name":"Frisian"},{"code":"gl","name":"Galician"},{"code":"ka","name":"Georgian"},{"code":"de","name":"German"},{"code":"el","name":"Greek"},{"code":"gn","name":"Guarani"},{"code":"gu","name":"Gujarati"},{"code":"ht","name":"Haitian Creole"},{"code":"ha","name":"Hausa"},{"code":"haw","name":"Hawaiian"},{"code":"iw","name":"Hebrew"},{"code":"hi","name":"Hindi"},{"code":"hmn","name":"Hmong"},{"code":"hu","name":"Hungarian"},{"code":"is","name":"Icelandic"},{"code":"ig","name":"Igbo"},{"code":"ilo","name":"Ilocano"},{"code":"id","name":"Indonesian"},{"code":"ga","name":"Irish"},{"code":"it","name":"Italian"},{"code":"ja","name":"Japanese"},{"code":"jw","name":"Javanese"},{"code":"kn","name":"Kannada"},{"code":"kk","name":"Kazakh"},{"code":"km","name":"Khmer"},{"code":"rw","name":"Kinyarwanda"},{"code":"gom","name":"Konkani"},{"code":"ko","name":"Korean"},{"code":"kri","name":"Krio"},{"code":"ku","name":"Kurdish (Kurmanji)"},{"code":"ckb","name":"Kurdish (Sorani)"},{"code":"ky","name":"Kyrgyz"},{"code":"lo","name":"Lao"},{"code":"la","name":"Latin"},{"code":"lv","name":"Latvian"},{"code":"ln","name":"Lingala"},{"code":"lt","name":"Lithuanian"},{"code":"lg","name":"Luganda"},{"code":"lb","name":"Luxembourgish"},{"code":"mk","name":"Macedonian"},{"code":"mai","name":"Maithili"},{"code":"mg","name":"Malagasy"},{"code":"ms","name":"Malay"},{"code":"ml","name":"Malayalam"},{"code":"mt","name":"Maltese"},{"code":"mi","name":"Maori"},{"code":"mr","name":"Marathi"},{"code":"mni-Mtei","name":"Meiteilon (Manipuri)"},{"code":"lus","name":"Mizo"},{"code":"mn","name":"Mongolian"},{"code":"my","name":"Myanmar (Burmese)"},{"code":"ne","name":"Nepali"},{"code":"no","name":"Norwegian"},{"code":"or","name":"Odia (Oriya)"},{"code":"om","name":"Oromo"},{"code":"ps","name":"Pashto"},{"code":"fa","name":"Persian(recently used language)"},{"code":"pl","name":"Polish"},{"code":"pt","name":"Portuguese"},{"code":"pa","name":"Punjabi"},{"code":"qu","name":"Quechua"},{"code":"ro","name":"Romanian"},{"code":"ru","name":"Russian"},{"code":"sm","name":"Samoan"},{"code":"sa","name":"Sanskrit"},{"code":"gd","name":"Scots Gaelic"},{"code":"nso","name":"Sepedi"},{"code":"sr","name":"Serbian"},{"code":"st","name":"Sesotho"},{"code":"sn","name":"Shona"},{"code":"sd","name":"Sindhi"},{"code":"si","name":"Sinhala"},{"code":"sk","name":"Slovak"},{"code":"sl","name":"Slovenian"},{"code":"so","name":"Somali"},{"code":"es","name":"Spanish"},{"code":"su","name":"Sundanese"},{"code":"sw","name":"Swahili"},{"code":"sv","name":"Swedish"},{"code":"tg","name":"Tajik"},{"code":"ta","name":"Tamil"},{"code":"tt","name":"Tatar"},{"code":"te","name":"Telugu"},{"code":"th","name":"Thai"},{"code":"ti","name":"Tigrinya"},{"code":"ts","name":"Tsonga"},{"code":"tr","name":"Turkish"},{"code":"tk","name":"Turkmen"},{"code":"ak","name":"Twi"},{"code":"uk","name":"Ukrainian"},{"code":"ur","name":"Urdu"},{"code":"ug","name":"Uyghur"},{"code":"uz","name":"Uzbek"},{"code":"vi","name":"Vietnamese"},{"code":"cy","name":"Welsh"},{"code":"xh","name":"Xhosa"},{"code":"yi","name":"Yiddish"},{"code":"yo","name":"Yoruba"},{"code":"zu","name":"Zulu"},{"code":"ckb","name":"Kurdish (Sorani)"},{"code":"ku","name":"Kurdish (Kurmanji)"},{"code":"en","name":"English"},{"code":"fa","name":"Persian"},{"code":"af","name":"Afrikaans"},{"code":"sq","name":"Albanian"},{"code":"am","name":"Amharic"},{"code":"ar","name":"Arabic"},{"code":"hy","name":"Armenian"},{"code":"as","name":"Assamese"},{"code":"ay","name":"Aymara"},{"code":"az","name":"Azerbaijani"},{"code":"bm","name":"Bambara"},{"code":"eu","name":"Basque"},{"code":"be","name":"Belarusian"},{"code":"bn","name":"Bengali"},{"code":"bho","name":"Bhojpuri"},{"code":"bs","name":"Bosnian"},{"code":"bg","name":"Bulgarian"},{"code":"ca","name":"Catalan"},{"code":"ceb","name":"Cebuano"},{"code":"ny","name":"Chichewa"},{"code":"zh-CN","name":"Chinese (Simplified)"},{"code":"zh-TW","name":"Chinese (Traditional)"},{"code":"co","name":"Corsican"},{"code":"hr","name":"Croatian"},{"code":"cs","name":"Czech"},{"code":"da","name":"Danish"},{"code":"dv","name":"Dhivehi"},{"code":"doi","name":"Dogri"},{"code":"nl","name":"Dutch"},{"code":"en","name":"English(recently used language)"},{"code":"eo","name":"Esperanto"},{"code":"et","name":"Estonian"},{"code":"ee","name":"Ewe"},{"code":"tl","name":"Filipino"},{"code":"fi","name":"Finnish"},{"code":"fr","name":"French"},{"code":"fy","name":"Frisian"},{"code":"gl","name":"Galician"},{"code":"ka","name":"Georgian"},{"code":"de","name":"German"},{"code":"el","name":"Greek"},{"code":"gn","name":"Guarani"},{"code":"gu","name":"Gujarati"},{"code":"ht","name":"Haitian Creole"},{"code":"ha","name":"Hausa"},{"code":"haw","name":"Hawaiian"},{"code":"iw","name":"Hebrew"},{"code":"hi","name":"Hindi"},{"code":"hmn","name":"Hmong"},{"code":"hu","name":"Hungarian"},{"code":"is","name":"Icelandic"},{"code":"ig","name":"Igbo"},{"code":"ilo","name":"Ilocano"},{"code":"id","name":"Indonesian"},{"code":"ga","name":"Irish"},{"code":"it","name":"Italian"},{"code":"ja","name":"Japanese"},{"code":"jw","name":"Javanese"},{"code":"kn","name":"Kannada"},{"code":"kk","name":"Kazakh"},{"code":"km","name":"Khmer"},{"code":"rw","name":"Kinyarwanda"},{"code":"gom","name":"Konkani"},{"code":"ko","name":"Korean"},{"code":"kri","name":"Krio"},{"code":"ku","name":"Kurdish (Kurmanji)(recently used language)"},{"code":"ckb","name":"Kurdish (Sorani)(recently used language)"},{"code":"ky","name":"Kyrgyz"},{"code":"lo","name":"Lao"},{"code":"la","name":"Latin"},{"code":"lv","name":"Latvian"},{"code":"ln","name":"Lingala"},{"code":"lt","name":"Lithuanian"},{"code":"lg","name":"Luganda"},{"code":"lb","name":"Luxembourgish"},{"code":"mk","name":"Macedonian"},{"code":"mai","name":"Maithili"},{"code":"mg","name":"Malagasy"},{"code":"ms","name":"Malay"},{"code":"ml","name":"Malayalam"},{"code":"mt","name":"Maltese"},{"code":"mi","name":"Maori"},{"code":"mr","name":"Marathi"},{"code":"mni-Mtei","name":"Meiteilon (Manipuri)"},{"code":"lus","name":"Mizo"},{"code":"mn","name":"Mongolian"},{"code":"my","name":"Myanmar (Burmese)"},{"code":"ne","name":"Nepali"},{"code":"no","name":"Norwegian"},{"code":"or","name":"Odia (Oriya)"},{"code":"om","name":"Oromo"},{"code":"ps","name":"Pashto"},{"code":"fa","name":"Persian(recently used language)"},{"code":"pl","name":"Polish"},{"code":"pt","name":"Portuguese"},{"code":"pa","name":"Punjabi"},{"code":"qu","name":"Quechua"},{"code":"ro","name":"Romanian"},{"code":"ru","name":"Russian"},{"code":"sm","name":"Samoan"},{"code":"sa","name":"Sanskrit"},{"code":"gd","name":"Scots Gaelic"},{"code":"nso","name":"Sepedi"},{"code":"sr","name":"Serbian"},{"code":"st","name":"Sesotho"},{"code":"sn","name":"Shona"},{"code":"sd","name":"Sindhi"},{"code":"si","name":"Sinhala"},{"code":"sk","name":"Slovak"},{"code":"sl","name":"Slovenian"},{"code":"so","name":"Somali"},{"code":"es","name":"Spanish"},{"code":"su","name":"Sundanese"},{"code":"sw","name":"Swahili"},{"code":"sv","name":"Swedish"},{"code":"tg","name":"Tajik"},{"code":"ta","name":"Tamil"},{"code":"tt","name":"Tatar"},{"code":"te","name":"Telugu"},{"code":"th","name":"Thai"},{"code":"ti","name":"Tigrinya"},{"code":"ts","name":"Tsonga"},{"code":"tr","name":"Turkish"},{"code":"tk","name":"Turkmen"},{"code":"ak","name":"Twi"},{"code":"uk","name":"Ukrainian"},{"code":"ur","name":"Urdu"},{"code":"ug","name":"Uyghur"},{"code":"uz","name":"Uzbek"},{"code":"vi","name":"Vietnamese"},{"code":"cy","name":"Welsh"},{"code":"xh","name":"Xhosa"},{"code":"yi","name":"Yiddish"},{"code":"yo","name":"Yoruba"},{"code":"zu","name":"Zulu"}]
apis/models/__init__.py ADDED
File without changes
examples/__init__.py CHANGED
@@ -0,0 +1 @@
 
 
1
+ #source
messagers/message_composer.py CHANGED
@@ -1,26 +1,16 @@
1
  import re
2
  from pprint import pprint
3
- from utils.logger import logger
4
 
5
 
6
  class MessageComposer:
7
- # LINK - apis/chat_api.py#available-models
8
- AVALAIBLE_MODELS = [
9
- "mixtral-8x7b",
10
- "mistral-7b",
11
- "openchat-3.5",
12
- "nous-mixtral-8x7b",
13
- ]
14
 
15
  def __init__(self, model: str = None):
16
- if model in self.AVALAIBLE_MODELS:
17
- self.model = model
18
- else:
19
- self.model = "mixtral-8x7b"
20
- self.system_roles = ["system"]
21
  self.inst_roles = ["user", "system", "inst"]
22
  self.answer_roles = ["assistant", "bot", "answer"]
23
- self.default_role = "user"
24
 
25
  def concat_messages_by_role(self, messages):
26
  def is_same_role(role1, role2):
@@ -50,86 +40,37 @@ class MessageComposer:
50
  return concat_messages
51
 
52
  def merge(self, messages) -> str:
53
- # Mistral and Mixtral:
54
- # <s> [INST] Instruction [/INST] Model answer </s> [INST] Follow-up instruction [/INST]
55
-
56
- # OpenChat:
57
- # GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi<|end_of_turn|>GPT4 Correct User: How are you today?<|end_of_turn|>GPT4 Correct Assistant:
58
 
59
- # Nous Mixtral:
60
- # <|im_start|>system
61
- # You are "Hermes 2".<|im_end|>
62
- # <|im_start|>user
63
- # Hello, who are you?<|im_end|>
64
- # <|im_start|>assistant
65
-
66
- self.messages = messages
67
  self.merged_str = ""
68
-
69
- # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1#instruction-format
70
- if self.model in ["mixtral-8x7b", "mistral-7b"]:
71
- self.messages = self.concat_messages_by_role(messages)
72
- self.cached_str = ""
73
- for message in self.messages:
74
- role = message["role"]
75
- content = message["content"]
76
- if role in self.inst_roles:
77
- self.cached_str = f"[INST] {content} [/INST]"
78
- elif role in self.answer_roles:
79
- self.merged_str += f"<s> {self.cached_str} {content} </s>\n"
80
- self.cached_str = ""
81
- else:
82
- self.cached_str = f"[INST] {content} [/INST]"
83
- if self.cached_str:
84
- self.merged_str += f"{self.cached_str}"
85
- # https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO#prompt-format
86
- elif self.model in ["nous-mixtral-8x7b"]:
87
- self.merged_str_list = []
88
- for message in self.messages:
89
- role = message["role"]
90
- content = message["content"]
91
- if role not in ["system", "user", "assistant"]:
92
- role = self.default_role
93
- message_line = f"<|im_start|>{role}\n{content}<|im_end|>"
94
- self.merged_str_list.append(message_line)
95
- self.merged_str_list.append("<|im_start|>assistant")
96
- self.merged_str = "\n".join(self.merged_str_list)
97
- # https://huggingface.co/openchat/openchat-3.5-0106
98
- elif self.model in ["openchat-3.5"]:
99
- self.messages = self.concat_messages_by_role(messages)
100
- self.merged_str_list = []
101
- self.end_of_turn = "<|end_of_turn|>"
102
- for message in self.messages:
103
- role = message["role"]
104
- content = message["content"]
105
- if role in self.inst_roles:
106
- self.merged_str_list.append(
107
- f"GPT4 Correct User:\n{content}{self.end_of_turn}"
108
- )
109
- elif role in self.answer_roles:
110
- self.merged_str_list.append(
111
- f"GPT4 Correct Assistant:\n{content}{self.end_of_turn}"
112
- )
113
- else:
114
- self.merged_str_list.append(
115
- f"GPT4 Correct User: {content}{self.end_of_turn}"
116
- )
117
- self.merged_str_list.append(f"GPT4 Correct Assistant:\n")
118
- self.merged_str = "\n".join(self.merged_str_list)
119
- else:
120
- self.merged_str = "\n".join(
121
- [
122
- f'`{message["role"]}`:\n{message["content"]}\n'
123
- for message in self.messages
124
- ]
125
- )
126
 
127
  return self.merged_str
128
 
129
- def convert_pair_matches_to_messages(self, pair_matches_list):
130
- messages = []
 
 
 
 
 
 
 
131
  if len(pair_matches_list) <= 0:
132
- messages = [
133
  {
134
  "role": "user",
135
  "content": self.merged_str,
@@ -139,15 +80,17 @@ class MessageComposer:
139
  for match in pair_matches_list:
140
  inst = match.group("inst")
141
  answer = match.group("answer")
142
- messages.extend(
143
  [
144
  {"role": "user", "content": inst.strip()},
145
  {"role": "assistant", "content": answer.strip()},
146
  ]
147
  )
148
- return messages
149
 
150
- def append_last_instruction_to_messages(self, inst_matches_list, pair_matches_list):
 
 
 
151
  if len(inst_matches_list) > len(pair_matches_list):
152
  self.messages.extend(
153
  [
@@ -158,78 +101,20 @@ class MessageComposer:
158
  ]
159
  )
160
 
161
- def split(self, merged_str) -> list:
162
- self.merged_str = merged_str
163
- self.messages = []
164
-
165
- if self.model in ["mixtral-8x7b", "mistral-7b"]:
166
- pair_pattern = (
167
- r"<s>\s*\[INST\](?P<inst>[\s\S]*?)\[/INST\](?P<answer>[\s\S]*?)</s>"
168
- )
169
- pair_matches = re.finditer(pair_pattern, self.merged_str, re.MULTILINE)
170
- pair_matches_list = list(pair_matches)
171
-
172
- self.messages = self.convert_pair_matches_to_messages(pair_matches_list)
173
-
174
- inst_pattern = r"\[INST\](?P<inst>[\s\S]*?)\[/INST\]"
175
- inst_matches = re.finditer(inst_pattern, self.merged_str, re.MULTILINE)
176
- inst_matches_list = list(inst_matches)
177
-
178
- self.append_last_instruction_to_messages(
179
- inst_matches_list, pair_matches_list
180
- )
181
- elif self.model in ["nous-mixtral-8x7b"]:
182
- # https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO#prompt-format
183
- # message_pattern = r"<\|im_start\|>(?P<role>system|user|assistant)[\s\n]*(?P<content>[\s\S]*?)<\|im_end\|>"
184
- message_pattern = r"<\|im_start\|>(?P<role>system|user|assistant)[\s\n]*(?P<content>[\s\S]*?)<\|im_end\|>"
185
- message_matches = re.finditer(
186
- message_pattern, self.merged_str, flags=re.MULTILINE | re.IGNORECASE
187
- )
188
- message_matches_list = list(message_matches)
189
- logger.note(f"message_matches_list: {message_matches_list}")
190
- for match in message_matches_list:
191
- role = match.group("role")
192
- content = match.group("content")
193
- self.messages.append({"role": role, "content": content.strip()})
194
- elif self.model in ["openchat-3.5"]:
195
- pair_pattern = r"GPT4 Correct User:(?P<inst>[\s\S]*?)<\|end_of_turn\|>\s*GPT4 Correct Assistant:(?P<answer>[\s\S]*?)<\|end_of_turn\|>"
196
- pair_matches = re.finditer(
197
- pair_pattern, self.merged_str, flags=re.MULTILINE | re.IGNORECASE
198
- )
199
- pair_matches_list = list(pair_matches)
200
- self.messages = self.convert_pair_matches_to_messages(pair_matches_list)
201
- inst_pattern = r"GPT4 Correct User:(?P<inst>[\s\S]*?)<\|end_of_turn\|>"
202
- inst_matches = re.finditer(
203
- inst_pattern, self.merged_str, flags=re.MULTILINE | re.IGNORECASE
204
- )
205
- inst_matches_list = list(inst_matches)
206
- self.append_last_instruction_to_messages(
207
- inst_matches_list, pair_matches_list
208
- )
209
- else:
210
- self.messages = [
211
- {
212
- "role": "user",
213
- "content": self.merged_str,
214
- }
215
- ]
216
-
217
  return self.messages
218
 
219
 
220
  if __name__ == "__main__":
221
- model = "mixtral-8x7b"
222
- # model = "nous-mixtral-8x7b"
223
- composer = MessageComposer(model)
224
  messages = [
225
  {
226
  "role": "system",
227
- "content": "You are a LLM developed by OpenAI.\nYour name is GPT-4.",
228
  },
229
  {"role": "user", "content": "Hello, who are you?"},
230
  {"role": "assistant", "content": "I am a bot."},
231
- {"role": "user", "content": "What is your name?"},
232
- # {"role": "assistant", "content": "My name is Bing."},
233
  # {"role": "user", "content": "Tell me a joke."},
234
  # {"role": "assistant", "content": "What is a robot's favorite type of music?"},
235
  # {
@@ -237,11 +122,7 @@ if __name__ == "__main__":
237
  # "content": "How many questions have I asked? Please list them.",
238
  # },
239
  ]
240
- logger.note(f"model: {composer.model}")
241
  merged_str = composer.merge(messages)
242
- logger.note("merged_str:")
243
- logger.mesg(merged_str)
244
- logger.note("splitted messages:")
245
  pprint(composer.split(merged_str))
246
- # logger.note("merged merged_str:")
247
- # logger.mesg(composer.merge(composer.split(merged_str)))
 
1
  import re
2
  from pprint import pprint
 
3
 
4
 
5
  class MessageComposer:
6
+ """
7
+ models:
8
+ - mixtral-8x7b (mistralai/Mixtral-8x7B-Instruct-v0.1)
9
+ """
 
 
 
10
 
11
  def __init__(self, model: str = None):
 
 
 
 
 
12
  self.inst_roles = ["user", "system", "inst"]
13
  self.answer_roles = ["assistant", "bot", "answer"]
 
14
 
15
  def concat_messages_by_role(self, messages):
16
  def is_same_role(role1, role2):
 
40
  return concat_messages
41
 
42
  def merge(self, messages) -> str:
43
+ # <s> [INST] Instruction [/INST] Model answer </s> [INST] Follow-up instruction [/INST]
 
 
 
 
44
 
45
+ self.messages = self.concat_messages_by_role(messages)
 
 
 
 
 
 
 
46
  self.merged_str = ""
47
+ self.cached_str = ""
48
+ for message in self.messages:
49
+ role = message["role"]
50
+ content = message["content"]
51
+ if role in self.inst_roles:
52
+ self.cached_str = f"[INST] {content} [/INST]"
53
+ elif role in self.answer_roles:
54
+ self.merged_str += f"<s> {self.cached_str} {content} </s>\n"
55
+ self.cached_str = ""
56
+ else:
57
+ self.cached_str = f"[INST] {content} [/INST]"
58
+ if self.cached_str:
59
+ self.merged_str += f"{self.cached_str}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  return self.merged_str
62
 
63
+ def split(self, merged_str) -> list:
64
+ self.messages = []
65
+ self.merged_str = merged_str
66
+ pair_pattern = (
67
+ r"<s>\s*\[INST\](?P<inst>[\s\S]*?)\[/INST\](?P<answer>[\s\S]*?)</s>"
68
+ )
69
+ pair_matches = re.finditer(pair_pattern, self.merged_str, re.MULTILINE)
70
+ pair_matches_list = list(pair_matches)
71
+
72
  if len(pair_matches_list) <= 0:
73
+ self.messages = [
74
  {
75
  "role": "user",
76
  "content": self.merged_str,
 
80
  for match in pair_matches_list:
81
  inst = match.group("inst")
82
  answer = match.group("answer")
83
+ self.messages.extend(
84
  [
85
  {"role": "user", "content": inst.strip()},
86
  {"role": "assistant", "content": answer.strip()},
87
  ]
88
  )
 
89
 
90
+ inst_pattern = r"\[INST\](?P<inst>[\s\S]*?)\[/INST\]"
91
+ inst_matches = re.finditer(inst_pattern, self.merged_str, re.MULTILINE)
92
+ inst_matches_list = list(inst_matches)
93
+
94
  if len(inst_matches_list) > len(pair_matches_list):
95
  self.messages.extend(
96
  [
 
101
  ]
102
  )
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  return self.messages
105
 
106
 
107
  if __name__ == "__main__":
108
+ composer = MessageComposer()
 
 
109
  messages = [
110
  {
111
  "role": "system",
112
+ "content": "You are a LLM developed by OpenAI. Your name is GPT-4.",
113
  },
114
  {"role": "user", "content": "Hello, who are you?"},
115
  {"role": "assistant", "content": "I am a bot."},
116
+ # {"role": "user", "content": "What is your name?"},
117
+ {"role": "assistant", "content": "My name is Bing."},
118
  # {"role": "user", "content": "Tell me a joke."},
119
  # {"role": "assistant", "content": "What is a robot's favorite type of music?"},
120
  # {
 
122
  # "content": "How many questions have I asked? Please list them.",
123
  # },
124
  ]
 
125
  merged_str = composer.merge(messages)
126
+ print(merged_str)
 
 
127
  pprint(composer.split(merged_str))
128
+ # print(composer.merge(composer.split(merged_str)))
 
messagers/message_outputer.py CHANGED
@@ -7,22 +7,20 @@ class OpenaiStreamOutputer:
7
  * https://platform.openai.com/docs/api-reference/chat/create
8
  """
9
 
10
- def __init__(self):
11
- self.default_data = {
 
 
 
 
 
12
  "created": 1700000000,
13
  "id": "chatcmpl-hugginface",
14
  "object": "chat.completion.chunk",
15
- # "content_type": "Completions",
16
  "model": "hugginface",
17
  "choices": [],
18
  }
19
-
20
- def data_to_string(self, data={}, content_type=""):
21
- data_str = f"{json.dumps(data)}"
22
- return data_str
23
-
24
- def output(self, content=None, content_type="Completions") -> str:
25
- data = self.default_data.copy()
26
  if content_type == "Role":
27
  data["choices"] = [
28
  {
 
7
  * https://platform.openai.com/docs/api-reference/chat/create
8
  """
9
 
10
+ def data_to_string(self, data={}, content_type=""):
11
+ data_str = f"{json.dumps(data)}"
12
+
13
+ return data_str
14
+
15
+ def output(self, content=None, content_type="Completions") -> str:
16
+ data = {
17
  "created": 1700000000,
18
  "id": "chatcmpl-hugginface",
19
  "object": "chat.completion.chunk",
20
+ # "content_type": content_type,
21
  "model": "hugginface",
22
  "choices": [],
23
  }
 
 
 
 
 
 
 
24
  if content_type == "Role":
25
  data["choices"] = [
26
  {
models/__init__.py ADDED
File without changes
networks/message_streamer.py CHANGED
@@ -1,7 +1,6 @@
1
  import json
2
  import re
3
  import requests
4
- from tiktoken import get_encoding as tiktoken_get_encoding
5
  from messagers.message_outputer import OpenaiStreamOutputer
6
  from utils.logger import logger
7
  from utils.enver import enver
@@ -11,27 +10,11 @@ class MessageStreamer:
11
  MODEL_MAP = {
12
  "mixtral-8x7b": "mistralai/Mixtral-8x7B-Instruct-v0.1", # 72.62, fast [Recommended]
13
  "mistral-7b": "mistralai/Mistral-7B-Instruct-v0.2", # 65.71, fast
14
- "nous-mixtral-8x7b": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
15
- # "openchat-3.5": "openchat/openchat-3.5-1210", # 68.89, fast
16
- # "zephyr-7b-beta": "HuggingFaceH4/zephyr-7b-beta", # Too Slow
17
- # "llama-70b": "meta-llama/Llama-2-70b-chat-hf", # ❌ Require Pro User
18
- # "codellama-34b": "codellama/CodeLlama-34b-Instruct-hf", # ❌ Low Score
19
- # "falcon-180b": "tiiuae/falcon-180B-chat", # ❌ Require Pro User
20
  "default": "mistralai/Mixtral-8x7B-Instruct-v0.1",
21
  }
22
- STOP_SEQUENCES_MAP = {
23
- "mixtral-8x7b": "</s>",
24
- "mistral-7b": "</s>",
25
- "nous-mixtral-8x7b": "<|im_end|>",
26
- "openchat-3.5": "<|end_of_turn|>",
27
- }
28
- TOKEN_LIMIT_MAP = {
29
- "mixtral-8x7b": 32768,
30
- "mistral-7b": 32768,
31
- "nous-mixtral-8x7b": 32768,
32
- "openchat-3.5": 8192,
33
- }
34
- TOKEN_RESERVED = 100
35
 
36
  def __init__(self, model: str):
37
  if model in self.MODEL_MAP.keys():
@@ -40,32 +23,21 @@ class MessageStreamer:
40
  self.model = "default"
41
  self.model_fullname = self.MODEL_MAP[self.model]
42
  self.message_outputer = OpenaiStreamOutputer()
43
- self.tokenizer = tiktoken_get_encoding("cl100k_base")
44
 
45
  def parse_line(self, line):
46
  line = line.decode("utf-8")
47
  line = re.sub(r"data:\s*", "", line)
48
  data = json.loads(line)
49
- try:
50
- content = data["token"]["text"]
51
- except:
52
- logger.err(data)
53
  return content
54
 
55
- def count_tokens(self, text):
56
- tokens = self.tokenizer.encode(text)
57
- token_count = len(tokens)
58
- logger.note(f"Prompt Token Count: {token_count}")
59
- return token_count
60
-
61
- def chat_response(
62
  self,
63
  prompt: str = None,
64
- temperature: float = 0.5,
65
- top_p: float = 0.95,
66
- max_new_tokens: int = None,
67
- api_key: str = None,
68
- use_cache: bool = False,
69
  ):
70
  # https://huggingface.co/docs/api-inference/detailed_parameters?code=curl
71
  # curl --proxy http://<server>:<port> https://api-inference.huggingface.co/models/<org>/<model_name> -X POST -d '{"inputs":"who are you?","parameters":{"max_new_token":64}}' -H 'Content-Type: application/json' -H 'Authorization: Bearer <HF_TOKEN>'
@@ -75,138 +47,51 @@ class MessageStreamer:
75
  self.request_headers = {
76
  "Content-Type": "application/json",
77
  }
78
-
79
- if api_key:
80
- logger.note(
81
- f"Using API Key: {api_key[:3]}{(len(api_key)-7)*'*'}{api_key[-4:]}"
82
- )
83
- self.request_headers["Authorization"] = f"Bearer {api_key}"
84
-
85
- if temperature is None or temperature < 0:
86
- temperature = 0.0
87
- # temperature must 0 < and < 1 for HF LLM models
88
- temperature = max(temperature, 0.01)
89
- temperature = min(temperature, 0.99)
90
- top_p = max(top_p, 0.01)
91
- top_p = min(top_p, 0.99)
92
-
93
- token_limit = int(
94
- self.TOKEN_LIMIT_MAP[self.model]
95
- - self.TOKEN_RESERVED
96
- - self.count_tokens(prompt) * 1.35
97
- )
98
- if token_limit <= 0:
99
- raise ValueError("Prompt exceeded token limit!")
100
-
101
- if max_new_tokens is None or max_new_tokens <= 0:
102
- max_new_tokens = token_limit
103
- else:
104
- max_new_tokens = min(max_new_tokens, token_limit)
105
-
106
  # References:
107
  # huggingface_hub/inference/_client.py:
108
  # class InferenceClient > def text_generation()
109
  # huggingface_hub/inference/_text_generation.py:
110
  # class TextGenerationRequest > param `stream`
111
  # https://huggingface.co/docs/text-generation-inference/conceptual/streaming#streaming-with-curl
112
- # https://huggingface.co/docs/api-inference/detailed_parameters#text-generation-task
113
  self.request_body = {
114
  "inputs": prompt,
115
  "parameters": {
116
- "temperature": temperature,
117
- "top_p": top_p,
118
  "max_new_tokens": max_new_tokens,
119
  "return_full_text": False,
120
  },
121
- "options": {
122
- "use_cache": use_cache,
123
- },
124
- "stream": True,
125
  }
126
-
127
- if self.model in self.STOP_SEQUENCES_MAP.keys():
128
- self.stop_sequences = self.STOP_SEQUENCES_MAP[self.model]
129
- # self.request_body["parameters"]["stop_sequences"] = [
130
- # self.STOP_SEQUENCES[self.model]
131
- # ]
132
-
133
  logger.back(self.request_url)
134
  enver.set_envs(proxies=True)
135
- stream_response = requests.post(
136
  self.request_url,
137
  headers=self.request_headers,
138
  json=self.request_body,
139
  proxies=enver.requests_proxies,
140
- stream=True,
141
  )
142
- status_code = stream_response.status_code
143
  if status_code == 200:
144
  logger.success(status_code)
145
  else:
146
  logger.err(status_code)
147
 
148
- return stream_response
149
-
150
- def chat_return_dict(self, stream_response):
151
- # https://platform.openai.com/docs/guides/text-generation/chat-completions-response-format
152
- final_output = self.message_outputer.default_data.copy()
153
- final_output["choices"] = [
154
- {
155
- "index": 0,
156
- "finish_reason": "stop",
157
- "message": {
158
- "role": "assistant",
159
- "content": "",
160
- },
161
- }
162
- ]
163
- logger.back(final_output)
164
-
165
- final_content = ""
166
- for line in stream_response.iter_lines():
167
  if not line:
168
  continue
169
- content = self.parse_line(line)
170
-
171
- if content.strip() == self.stop_sequences:
172
- logger.success("\n[Finished]")
173
- break
174
- else:
175
- logger.back(content, end="")
176
- final_content += content
177
-
178
- if self.model in self.STOP_SEQUENCES_MAP.keys():
179
- final_content = final_content.replace(self.stop_sequences, "")
180
-
181
- final_content = final_content.strip()
182
- final_output["choices"][0]["message"]["content"] = final_content
183
- return final_output
184
-
185
- def chat_return_generator(self, stream_response):
186
- is_finished = False
187
- line_count = 0
188
- for line in stream_response.iter_lines():
189
- if line:
190
- line_count += 1
191
- else:
192
- continue
193
 
194
  content = self.parse_line(line)
195
 
196
- if content.strip() == self.stop_sequences:
197
  content_type = "Finished"
198
  logger.success("\n[Finished]")
199
- is_finished = True
200
  else:
201
  content_type = "Completions"
202
- if line_count == 1:
203
- content = content.lstrip()
204
  logger.back(content, end="")
205
 
206
- output = self.message_outputer.output(
207
- content=content, content_type=content_type
208
- )
209
- yield output
210
-
211
- if not is_finished:
212
- yield self.message_outputer.output(content="", content_type="Finished")
 
1
  import json
2
  import re
3
  import requests
 
4
  from messagers.message_outputer import OpenaiStreamOutputer
5
  from utils.logger import logger
6
  from utils.enver import enver
 
10
  MODEL_MAP = {
11
  "mixtral-8x7b": "mistralai/Mixtral-8x7B-Instruct-v0.1", # 72.62, fast [Recommended]
12
  "mistral-7b": "mistralai/Mistral-7B-Instruct-v0.2", # 65.71, fast
13
+ "openchat-3.5": "openchat/openchat_3.5", # 61.24, fast
14
+ # "zephyr-7b-alpha": "HuggingFaceH4/zephyr-7b-alpha", # 59.5, fast
15
+ # "zephyr-7b-beta": "HuggingFaceH4/zephyr-7b-beta", # 61.95, slow
 
 
 
16
  "default": "mistralai/Mixtral-8x7B-Instruct-v0.1",
17
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def __init__(self, model: str):
20
  if model in self.MODEL_MAP.keys():
 
23
  self.model = "default"
24
  self.model_fullname = self.MODEL_MAP[self.model]
25
  self.message_outputer = OpenaiStreamOutputer()
 
26
 
27
  def parse_line(self, line):
28
  line = line.decode("utf-8")
29
  line = re.sub(r"data:\s*", "", line)
30
  data = json.loads(line)
31
+ content = data["token"]["text"]
 
 
 
32
  return content
33
 
34
+ def chat(
 
 
 
 
 
 
35
  self,
36
  prompt: str = None,
37
+ temperature: float = 0.01,
38
+ max_new_tokens: int = 8192,
39
+ stream: bool = True,
40
+ yield_output: bool = False,
 
41
  ):
42
  # https://huggingface.co/docs/api-inference/detailed_parameters?code=curl
43
  # curl --proxy http://<server>:<port> https://api-inference.huggingface.co/models/<org>/<model_name> -X POST -d '{"inputs":"who are you?","parameters":{"max_new_token":64}}' -H 'Content-Type: application/json' -H 'Authorization: Bearer <HF_TOKEN>'
 
47
  self.request_headers = {
48
  "Content-Type": "application/json",
49
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  # References:
51
  # huggingface_hub/inference/_client.py:
52
  # class InferenceClient > def text_generation()
53
  # huggingface_hub/inference/_text_generation.py:
54
  # class TextGenerationRequest > param `stream`
55
  # https://huggingface.co/docs/text-generation-inference/conceptual/streaming#streaming-with-curl
 
56
  self.request_body = {
57
  "inputs": prompt,
58
  "parameters": {
59
+ "temperature": max(temperature, 0.01), # must be positive
 
60
  "max_new_tokens": max_new_tokens,
61
  "return_full_text": False,
62
  },
63
+ "stream": stream,
 
 
 
64
  }
 
 
 
 
 
 
 
65
  logger.back(self.request_url)
66
  enver.set_envs(proxies=True)
67
+ stream = requests.post(
68
  self.request_url,
69
  headers=self.request_headers,
70
  json=self.request_body,
71
  proxies=enver.requests_proxies,
72
+ stream=stream,
73
  )
74
+ status_code = stream.status_code
75
  if status_code == 200:
76
  logger.success(status_code)
77
  else:
78
  logger.err(status_code)
79
 
80
+ for line in stream.iter_lines():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  if not line:
82
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  content = self.parse_line(line)
85
 
86
+ if content.strip() == "</s>":
87
  content_type = "Finished"
88
  logger.success("\n[Finished]")
 
89
  else:
90
  content_type = "Completions"
 
 
91
  logger.back(content, end="")
92
 
93
+ if yield_output:
94
+ output = self.message_outputer.output(
95
+ content=content, content_type=content_type
96
+ )
97
+ yield output
 
 
requirements.txt CHANGED
@@ -1,12 +1,17 @@
1
  aiohttp
2
  fastapi
3
  httpx
4
- markdown2[all]
5
  openai
6
  pydantic
7
  requests
8
  sse_starlette
9
  termcolor
10
- tiktoken
11
  uvicorn
12
- websockets
 
 
 
 
 
 
 
 
1
  aiohttp
2
  fastapi
3
  httpx
 
4
  openai
5
  pydantic
6
  requests
7
  sse_starlette
8
  termcolor
 
9
  uvicorn
10
+ websockets
11
+ googletrans==3.1.0a0
12
+ gTTS
13
+ torch
14
+ transformers
15
+ transformers[sentencepiece]
16
+ python-multipart
17
+ numpy
utils/enver.py CHANGED
@@ -1,8 +1,10 @@
1
  import json
2
  import os
 
3
  from pathlib import Path
4
  from utils.logger import logger
5
 
 
6
  class OSEnver:
7
  def __init__(self):
8
  self.envs_stack = []
 
1
  import json
2
  import os
3
+
4
  from pathlib import Path
5
  from utils.logger import logger
6
 
7
+
8
  class OSEnver:
9
  def __init__(self):
10
  self.envs_stack = []