| | import os |
| | from flask import Flask, request, Response, jsonify |
| | import requests |
| | import json |
| | import random |
| | from helper import create_jwt, github_username_zed_userid_list |
| |
|
| | app = Flask(__name__) |
| |
|
| | @app.route('/hf/v1/chat/completions', methods=['POST']) |
| | def chat(): |
| | |
| | payload = request.json |
| |
|
| | |
| | model = payload.get('model', 'claude-3-5-sonnet-20241022') |
| |
|
| | |
| | url = "https://llm.zed.dev/completion?" |
| | |
| | llm_payload = { |
| | "provider": "anthropic", |
| | "model": model, |
| | "provider_request": { |
| | "model": model, |
| | "max_tokens": payload.get('max_tokens', 8192), |
| | "temperature": payload.get('temperature', 0), |
| | "top_p": payload.get('top_p', 0.7), |
| | "messages": payload['messages'], |
| | "stream": payload.get('stream', True), |
| | "system": payload.get('system', "") |
| | } |
| | } |
| |
|
| | github_username, zed_user_id = random.choice(github_username_zed_userid_list) |
| | jwt = create_jwt(github_username, zed_user_id) |
| |
|
| | headers = { |
| | 'Host': 'llm.zed.dev', |
| | 'accept': '*/*', |
| | 'content-type': 'application/json', |
| | 'authorization': f'Bearer {jwt}', |
| | 'user-agent': 'Zed/0.149.3 (macos; aarch64)' |
| | } |
| |
|
| | |
| | proxy = os.environ.get('HTTP_PROXY', None) |
| | proxies = {'http': proxy, 'https': proxy} if proxy else None |
| |
|
| | def generate(): |
| | with requests.post(url, headers=headers, json=llm_payload, stream=True, proxies=proxies) as response: |
| | for chunk in response.iter_content(chunk_size=1024): |
| | if chunk: |
| | |
| | try: |
| | data = json.loads(chunk.decode('utf-8')) |
| | content = data.get('completion', '') |
| | yield f"data: {json.dumps({'choices': [{'delta': {'content': content}}]})}\n\n" |
| | except json.JSONDecodeError: |
| | continue |
| | yield "data: [DONE]\n\n" |
| |
|
| | if payload.get('stream', False): |
| | return Response(generate(), content_type='text/event-stream') |
| | else: |
| | with requests.post(url, headers=headers, json=llm_payload, proxies=proxies) as response: |
| | data = response.json() |
| | return jsonify({ |
| | "id": "chatcmpl-" + os.urandom(12).hex(), |
| | "object": "chat.completion", |
| | "created": int(time.time()), |
| | "model": model, |
| | "choices": [{ |
| | "index": 0, |
| | "message": { |
| | "role": "assistant", |
| | "content": data.get('completion', '') |
| | }, |
| | "finish_reason": "stop" |
| | }], |
| | "usage": { |
| | "prompt_tokens": -1, |
| | "completion_tokens": -1, |
| | "total_tokens": -1 |
| | } |
| | }) |
| |
|
| | if __name__ == '__main__': |
| | app.run(debug=True) |