Spaces:
Runtime error
Runtime error
File size: 3,946 Bytes
e100b79 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import os
import time
import random
import json
import asyncio
import requests
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from typing import List, Optional, Union
app = FastAPI()
class ChatCompletionMessage(BaseModel):
role: str
content: str
class ChatCompletionRequest(BaseModel):
model: str
messages: List[ChatCompletionMessage]
temperature: Optional[float] = 1.0
max_tokens: Optional[int] = None
stream: Optional[bool] = False
class ChatCompletionResponse(BaseModel):
id: str
object: str
created: int
model: str
choices: List[dict]
usage: dict
def generate_random_ip():
return f"{random.randint(1,255)}.{random.randint(0,255)}.{random.randint(0,255)}.{random.randint(0,255)}"
async def fetch_response(messages: List[ChatCompletionMessage], model: str):
your_api_url = "https://chatpro.ai-pro.org/api/ask/openAI"
headers = {
"content-type": "application/json",
"X-Forwarded-For": generate_random_ip(),
"origin": "https://chatpro.ai-pro.org",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36"
}
conversation = "\n".join([f"{msg.role}: {msg.content}" for msg in messages])
conversation += "\n请关注并回复user最近的消息并避免总结对话历史的回答"
data = {
"text": conversation,
"endpoint": "openAI",
"model": model
}
response = requests.post(your_api_url, headers=headers, json=data)
if response.status_code != 200:
raise HTTPException(status_code=response.status_code, detail="Error from upstream API")
return response.json()
async def stream_response(content: str):
chunk_size = len(content) # 将整个内容作为一个块发送
chat_id = f"chatcmpl-{os.urandom(12).hex()}"
# 发送开始的块
yield f"data: {json.dumps({
'id': chat_id,
'object': 'chat.completion.chunk',
'created': int(time.time()),
'model': 'gpt-3.5-turbo-0613',
'choices': [{
'index': 0,
'delta': {
'content': content
},
'finish_reason': None
}]
})}\n\n"
# 发送结束的块
yield f"data: {json.dumps({
'id': chat_id,
'object': 'chat.completion.chunk',
'created': int(time.time()),
'model': 'gpt-3.5-turbo-0613',
'choices': [{
'index': 0,
'delta': {},
'finish_reason': 'stop'
}]
})}\n\n"
yield 'data: [DONE]\n\n'
@app.post("/hf/v1/chat/completions")
async def chat_completions(request: Request):
body = await request.json()
chat_request = ChatCompletionRequest(**body)
api_response = await fetch_response(chat_request.messages, chat_request.model)
content = api_response.get("response", "")
if chat_request.stream:
return StreamingResponse(stream_response(content), media_type="text/event-stream")
else:
openai_response = ChatCompletionResponse(
id="chatcmpl-" + os.urandom(12).hex(),
object="chat.completion",
created=int(time.time()),
model=chat_request.model,
choices=[
{
"index": 0,
"message": {
"role": "assistant",
"content": content
},
"finish_reason": "stop"
}
],
usage={
"prompt_tokens": sum(len(msg.content) for msg in chat_request.messages),
"completion_tokens": len(content),
"total_tokens": sum(len(msg.content) for msg in chat_request.messages) + len(content)
}
)
return openai_response |