tastypear commited on
Commit
5e4853a
·
verified ·
1 Parent(s): 29d8470

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +68 -68
main.py CHANGED
@@ -1,69 +1,69 @@
1
- import random
2
- import requests
3
- from flask import Flask, request, jsonify, Response, stream_with_context, render_template_string
4
- from mistral_common.protocol.instruct.messages import AssistantMessage, UserMessage, SystemMessage
5
- from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
6
- from mistral_common.protocol.instruct.request import ChatCompletionRequest
7
- mt_v3 = MistralTokenizer.v3(is_tekken=True)
8
-
9
- def calc_messages_tokens(json_data):
10
- messages = json_data["messages"]
11
- m_messages = []
12
- for message in messages:
13
- if message["role"] == "system":
14
- m_messages.append(SystemMessage(content=message["content"]))
15
- elif message["role"] == "user":
16
- m_messages.append(UserMessage(content=message["content"]))
17
- elif message["role"] == "assistant":
18
- m_messages.append(AssistantMessage(content=message["content"]))
19
- else:
20
- continue
21
- tokens = mt_v3.encode_chat_completion(ChatCompletionRequest(messages=m_messages)).tokens
22
- return len(tokens) + len(m_messages)
23
-
24
- app = Flask(__name__)
25
-
26
- @app.route('/', methods=['GET'])
27
- def index():
28
- template = '''
29
- <html>
30
- <head>
31
- <title>Mistral-Nemo Chat API</title>
32
- </head>
33
- <body>
34
- <h1>Mistral-Nemo OpenAI Compatible API</h1>
35
- <li>1. Create your key <a href="https://huggingface.co/settings/tokens/new">[here]</a> by selecting "serverless Inference API".</li>
36
- <li>2. Set `https://tastypear-mistral-nemo-chat.hf.space/api" as the domain in the client configuration.</li>
37
- If you have multiple keys, you can concatenate them with a semicolon (`;`) to use them randomly, e.g., `hf_aaaa;hf_bbbb;hf_...`
38
- </body>
39
- </html>
40
- '''
41
- return render_template_string(template)
42
-
43
- @app.route('/api/v1/chat/completions', methods=['POST'])
44
- def proxy():
45
- headers = dict(request.headers)
46
- headers.pop('Host', None)
47
- headers.pop('Content-Length', None)
48
- keys = request.headers['Authorization'].split(' ')[1].split(';')
49
- headers['Authorization'] = f'Bearer {random.choice(keys)}'
50
-
51
- json_data = request.get_json()
52
-
53
- # Avoid using cache
54
- json_data["messages"][-1]['content'] = ' '*random.randint(1, 20)+json_data["messages"][-1]['content']
55
-
56
- # Use the largest ctx
57
- json_data['max_tokens'] = 32768 - calc_messages_tokens(json_data)
58
-
59
- json_data['json_mode'] = False
60
- model = json_data['model']
61
-
62
- def generate():
63
- model = 'mistralai/Mistral-Nemo-Instruct-2407'
64
- with requests.post(f"https://api-inference.huggingface.co/models/{model}/v1/chat/completions", json=request.json, headers=headers, stream=True) as resp:
65
- for chunk in resp.iter_content(chunk_size=1024):
66
- if chunk:
67
- yield chunk
68
-
69
  return Response(stream_with_context(generate()), content_type='text/event-stream')
 
1
+ import random
2
+ import requests
3
+ from flask import Flask, request, jsonify, Response, stream_with_context, render_template_string
4
+ from mistral_common.protocol.instruct.messages import AssistantMessage, UserMessage, SystemMessage
5
+ from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
6
+ from mistral_common.protocol.instruct.request import ChatCompletionRequest
7
+ mt_v3 = MistralTokenizer.v3(is_tekken=True)
8
+
9
+ def calc_messages_tokens(json_data):
10
+ messages = json_data["messages"]
11
+ m_messages = []
12
+ for message in messages:
13
+ if message["role"] == "system":
14
+ m_messages.append(SystemMessage(content=message["content"]))
15
+ elif message["role"] == "user":
16
+ m_messages.append(UserMessage(content=message["content"]))
17
+ elif message["role"] == "assistant":
18
+ m_messages.append(AssistantMessage(content=message["content"]))
19
+ else:
20
+ continue
21
+ tokens = mt_v3.encode_chat_completion(ChatCompletionRequest(messages=m_messages)).tokens
22
+ return len(tokens) + len(m_messages)
23
+
24
+ app = Flask(__name__)
25
+
26
+ @app.route('/', methods=['GET'])
27
+ def index():
28
+ template = '''
29
+ <html>
30
+ <head>
31
+ <title>Mistral-Nemo Chat API</title>
32
+ </head>
33
+ <body>
34
+ <h1>Mistral-Nemo OpenAI Compatible API</h1>
35
+ <li>1. Create your key <a href="https://huggingface.co/settings/tokens/new">[here]</a> with "serverless Inference API" permission selected.</li>
36
+ <li>2. Set "https://tastypear-mistral-nemo-chat.hf.space/api" as the domain in the client configuration.</li>
37
+ If you have multiple keys, you can concatenate them with a semicolon (`;`) to use them randomly, e.g., `hf_aaaa;hf_bbbb;hf_...`
38
+ </body>
39
+ </html>
40
+ '''
41
+ return render_template_string(template)
42
+
43
+ @app.route('/api/v1/chat/completions', methods=['POST'])
44
+ def proxy():
45
+ headers = dict(request.headers)
46
+ headers.pop('Host', None)
47
+ headers.pop('Content-Length', None)
48
+ keys = request.headers['Authorization'].split(' ')[1].split(';')
49
+ headers['Authorization'] = f'Bearer {random.choice(keys)}'
50
+
51
+ json_data = request.get_json()
52
+
53
+ # Avoid using cache
54
+ json_data["messages"][-1]['content'] = ' '*random.randint(1, 20)+json_data["messages"][-1]['content']
55
+
56
+ # Use the largest ctx
57
+ json_data['max_tokens'] = 32768 - calc_messages_tokens(json_data)
58
+
59
+ json_data['json_mode'] = False
60
+ model = json_data['model']
61
+
62
+ def generate():
63
+ model = 'mistralai/Mistral-Nemo-Instruct-2407'
64
+ with requests.post(f"https://api-inference.huggingface.co/models/{model}/v1/chat/completions", json=request.json, headers=headers, stream=True) as resp:
65
+ for chunk in resp.iter_content(chunk_size=1024):
66
+ if chunk:
67
+ yield chunk
68
+
69
  return Response(stream_with_context(generate()), content_type='text/event-stream')