Spaces:
Running
Running
import asyncio | |
import json | |
import logging | |
import random | |
import time | |
import os | |
from datetime import datetime | |
from typing import Dict, List, Optional, Set, Tuple, Any | |
from fastapi import FastAPI, Request, Response, WebSocket, WebSocketDisconnect, HTTPException | |
from fastapi.responses import StreamingResponse, HTMLResponse | |
from fastapi.staticfiles import StaticFiles | |
import uvicorn | |
import httpx | |
# 日志记录器模块 | |
class LoggingService: | |
def __init__(self, service_name: str = "ProxyServer"): | |
self.service_name = service_name | |
self.logger = logging.getLogger(service_name) | |
self.logger.setLevel(logging.DEBUG) | |
# 配置日志格式 | |
formatter = logging.Formatter( | |
"[%(levelname)s] %(asctime)s [%(name)s] - %(message)s" | |
) | |
# 添加控制台处理器 | |
ch = logging.StreamHandler() | |
ch.setFormatter(formatter) | |
self.logger.addHandler(ch) | |
def _format_message(self, level: str, message: str) -> str: | |
timestamp = datetime.now().isoformat() | |
return f"[{level}] {timestamp} [{self.service_name}] - {message}" | |
def info(self, message: str): | |
self.logger.info(message) | |
def error(self, message: str): | |
self.logger.error(message) | |
def warn(self, message: str): | |
self.logger.warning(message) | |
def debug(self, message: str): | |
self.logger.debug(message) | |
# 消息队列实现 | |
class MessageQueue: | |
def __init__(self, timeout_ms: int = 600000): | |
self.messages: List[dict] = [] | |
self.waiting_resolvers: List[Tuple[asyncio.Future, asyncio.TimerHandle]] = [] | |
self.default_timeout = timeout_ms / 1000 # 转换为秒 | |
self.closed = False | |
def enqueue(self, message: dict): | |
if self.closed: | |
return | |
if self.waiting_resolvers: | |
future, timer = self.waiting_resolvers.pop(0) | |
timer.cancel() | |
if not future.done(): | |
future.set_result(message) | |
else: | |
self.messages.append(message) | |
async def dequeue(self, timeout_ms: Optional[int] = None) -> dict: | |
if self.closed: | |
raise Exception("Queue is closed") | |
if self.messages: | |
return self.messages.pop(0) | |
timeout = self.default_timeout if timeout_ms is None else timeout_ms / 1000 | |
loop = asyncio.get_running_loop() | |
future = loop.create_future() | |
def timeout_callback(): | |
if not future.done(): | |
future.set_exception(Exception("Queue timeout")) | |
timer = loop.call_later(timeout, timeout_callback) | |
self.waiting_resolvers.append((future, timer)) | |
try: | |
return await future | |
finally: | |
if (future, timer) in self.waiting_resolvers: | |
self.waiting_resolvers.remove((future, timer)) | |
timer.cancel() | |
def close(self): | |
self.closed = True | |
for future, timer in self.waiting_resolvers: | |
timer.cancel() | |
if not future.done(): | |
future.set_exception(Exception("Queue closed")) | |
self.waiting_resolvers.clear() | |
self.messages.clear() | |
# WebSocket连接管理器 | |
class ConnectionRegistry: | |
def __init__(self, logger: LoggingService): | |
self.logger = logger | |
self.main_connections: Set[WebSocket] = set() # 主连接集合 | |
self.request_connections: Dict[str, WebSocket] = {} # 请求ID到专用连接的映射 | |
self.message_queues: Dict[str, MessageQueue] = {} | |
self._connection_added_callbacks = [] | |
self._connection_removed_callbacks = [] | |
def on_connection_added(self, callback): | |
self._connection_added_callbacks.append(callback) | |
def on_connection_removed(self, callback): | |
self._connection_removed_callbacks.append(callback) | |
async def add_main_connection(self, websocket: WebSocket, client_info: dict): | |
"""添加主WebSocket连接""" | |
await websocket.accept() | |
self.main_connections.add(websocket) | |
self.logger.info(f"新主连接: {client_info.get('address')}") | |
# 触发连接添加事件 | |
for callback in self._connection_added_callbacks: | |
callback(websocket) | |
async def add_request_connection(self, websocket: WebSocket, request_id: str, client_info: dict): | |
"""添加请求专用WebSocket连接""" | |
await websocket.accept() | |
self.request_connections[request_id] = websocket | |
self.logger.info(f"新请求连接 [ID: {request_id}]: {client_info.get('address')}") | |
async def remove_main_connection(self, websocket: WebSocket): | |
"""移除主WebSocket连接""" | |
if websocket in self.main_connections: | |
self.main_connections.remove(websocket) | |
self.logger.info("主连接断开") | |
# 触发连接移除事件 | |
for callback in self._connection_removed_callbacks: | |
callback(websocket) | |
async def remove_request_connection(self, websocket: WebSocket, request_id: str): | |
"""移除请求专用WebSocket连接""" | |
if request_id in self.request_connections and self.request_connections[request_id] == websocket: | |
del self.request_connections[request_id] | |
self.logger.info(f"请求连接断开 [ID: {request_id}]") | |
# 关闭相关的消息队列 | |
queue = self.message_queues.get(request_id) | |
if queue: | |
queue.close() | |
del self.message_queues[request_id] | |
async def handle_main_message(self, message_data: str): | |
"""处理来自主连接的消息""" | |
try: | |
parsed_message = json.loads(message_data) | |
request_id = parsed_message.get("request_id") | |
if not request_id: | |
self.logger.warn("收到无效消息:缺少request_id") | |
return | |
# 主连接只接收初始请求,不处理响应 | |
self.logger.info(f"收到主连接请求 [ID: {request_id}]") | |
except Exception as error: | |
self.logger.error(f"解析主连接WebSocket消息失败: {str(error)}") | |
async def handle_request_message(self, message_data: str, request_id: str): | |
"""处理来自请求专用连接的消息""" | |
try: | |
parsed_message = json.loads(message_data) | |
message_request_id = parsed_message.get("request_id") | |
if not message_request_id: | |
self.logger.warn("收到无效消息:缺少request_id") | |
return | |
if message_request_id != request_id: | |
self.logger.warn(f"请求ID不匹配: 预期 {request_id}, 实际 {message_request_id}") | |
return | |
queue = self.message_queues.get(request_id) | |
if queue: | |
await self._route_message(parsed_message, queue) | |
else: | |
self.logger.warn(f"收到未知请求ID的消息: {request_id}") | |
except Exception as error: | |
self.logger.error(f"解析请求连接WebSocket消息失败: {str(error)}") | |
async def _route_message(self, message: dict, queue: MessageQueue): | |
event_type = message.get("event_type") | |
if event_type in ["response_headers", "chunk", "error"]: | |
queue.enqueue(message) | |
elif event_type == "stream_close": | |
queue.enqueue({"type": "STREAM_END"}) | |
else: | |
self.logger.warn(f"未知的事件类型: {event_type}") | |
def has_active_main_connections(self) -> bool: | |
"""检查是否有活跃的主连接""" | |
return len(self.main_connections) > 0 | |
def get_random_main_connection(self) -> Optional[WebSocket]: | |
"""随机获取一个主连接""" | |
if not self.main_connections: | |
return None | |
connections = list(self.main_connections) | |
random_index = random.randint(0, len(connections) - 1) | |
self.logger.info(f"随机选择主连接 {random_index + 1}/{len(connections)}") | |
return connections[random_index] | |
def get_request_connection(self, request_id: str) -> Optional[WebSocket]: | |
"""获取指定请求ID的专用连接""" | |
return self.request_connections.get(request_id) | |
def create_message_queue(self, request_id: str) -> MessageQueue: | |
"""为请求创建消息队列""" | |
queue = MessageQueue() | |
self.message_queues[request_id] = queue | |
return queue | |
def remove_message_queue(self, request_id: str): | |
"""移除请求的消息队列""" | |
queue = self.message_queues.get(request_id) | |
if queue: | |
queue.close() | |
del self.message_queues[request_id] | |
# 请求处理器 | |
class RequestHandler: | |
def __init__(self, connection_registry: ConnectionRegistry, logger: LoggingService): | |
self.connection_registry = connection_registry | |
self.logger = logger | |
async def process_request(self, request: Request) -> StreamingResponse: | |
self.logger.info(f"处理请求: {request.method} {request.url.path}") | |
if not self.connection_registry.has_active_main_connections(): | |
raise HTTPException(status_code=503, detail="没有可用的浏览器连接") | |
request_id = self._generate_request_id() | |
proxy_request = await self._build_proxy_request(request, request_id) | |
message_queue = self.connection_registry.create_message_queue(request_id) | |
try: | |
# 通过主连接发送请求信息,包含请求专用WebSocket的URL | |
await self._notify_main_connection(proxy_request, request_id) | |
# 等待请求专用连接建立 | |
await self._wait_for_request_connection(request_id) | |
# 通过请求专用连接转发请求 | |
await self._forward_request(proxy_request, request_id) | |
return await self._handle_response(request, message_queue, request_id) | |
except Exception as error: | |
# 只在出错时清理队列 | |
self.connection_registry.remove_message_queue(request_id) | |
if str(error) == "Queue timeout": | |
raise HTTPException(status_code=504, detail="请求超时") | |
elif str(error) == "请求连接建立超时": | |
raise HTTPException(status_code=504, detail="请求连接建立超时") | |
else: | |
# 检查是否是 HTTP 400 INVALID_ARGUMENT 错误,如果是则打印完整请求 | |
if "HTTP 400" in str(error) and "INVALID_ARGUMENT" in str(error): | |
self.logger.error(f"[RequestProcessor] 请求执行失败: {str(error)}") | |
self.logger.error(f"完整请求信息:") | |
self.logger.error(f" 请求ID: {request_id}") | |
self.logger.error(f" 方法: {proxy_request['method']}") | |
self.logger.error(f" 路径: {proxy_request['path']}") | |
self.logger.error(f" 请求头: {json.dumps(proxy_request['headers'], indent=2, ensure_ascii=False)}") | |
self.logger.error(f" 查询参数: {json.dumps(proxy_request['query_params'], indent=2, ensure_ascii=False)}") | |
self.logger.error(f" 请求体: {proxy_request['body']}") | |
else: | |
self.logger.error(f"请求处理错误: {str(error)}") | |
raise HTTPException(status_code=500, detail=f"代理错误: {str(error)}") | |
def _generate_request_id(self) -> str: | |
return f"{int(time.time() * 1000)}_{random.getrandbits(32):08x}" | |
async def _build_proxy_request(self, request: Request, request_id: str) -> dict: | |
body = "" | |
body_data = await request.body() | |
if body_data: | |
try: | |
body = body_data.decode('utf-8') | |
except UnicodeDecodeError: | |
body = str(body_data) | |
return { | |
"path": request.url.path, | |
"method": request.method, | |
"headers": dict(request.headers), | |
"query_params": dict(request.query_params), | |
"body": body, | |
"request_id": request_id, | |
} | |
async def _notify_main_connection(self, proxy_request: dict, request_id: str): | |
"""通知主连接有新请求""" | |
connection = self.connection_registry.get_random_main_connection() | |
if not connection: | |
raise Exception("没有可用的主连接") | |
# 发送完整的请求信息到主连接 | |
await connection.send_text(json.dumps(proxy_request)) | |
self.logger.info(f"已通知主连接新请求 [ID: {request_id}]") | |
async def _wait_for_request_connection(self, request_id: str, timeout: int = 30): | |
"""等待请求专用连接建立""" | |
start_time = time.time() | |
while time.time() - start_time < timeout: | |
if self.connection_registry.get_request_connection(request_id): | |
self.logger.info(f"请求连接已建立 [ID: {request_id}]") | |
return | |
await asyncio.sleep(0.1) | |
self.logger.error(f"请求连接建立超时 [ID: {request_id}],已等待 {timeout} 秒") | |
raise Exception("请求连接建立超时") | |
async def _forward_request(self, proxy_request: dict, request_id: str): | |
"""通过请求专用连接转发请求""" | |
connection = self.connection_registry.get_request_connection(request_id) | |
if not connection: | |
raise Exception(f"请求连接不存在 [ID: {request_id}]") | |
await connection.send_text(json.dumps(proxy_request)) | |
self.logger.info(f"请求已转发到专用连接 [ID: {request_id}]") | |
async def _handle_response(self, request: Request, message_queue: MessageQueue, request_id: str) -> StreamingResponse: | |
# 等待响应头 | |
try: | |
header_message = await message_queue.dequeue() | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=f"获取响应头失败: {str(e)}") | |
if header_message.get("event_type") == "error": | |
error_status = header_message.get("status", 500) | |
error_message = header_message.get("message", "未知错误") | |
# 检查是否是 HTTP 400 INVALID_ARGUMENT 错误,如果是则打印完整请求 | |
if error_status == 400 and "INVALID_ARGUMENT" in error_message: | |
self.logger.error(f"[RequestProcessor] 请求执行失败: HTTP {error_status}: {error_message}") | |
self.logger.error(f"完整请求信息:") | |
self.logger.error(f" 请求ID: {request_id}") | |
# 需要从请求中重新构建 proxy_request 信息 | |
proxy_request = await self._build_proxy_request(request, request_id) | |
self.logger.error(f" 方法: {proxy_request['method']}") | |
self.logger.error(f" 路径: {proxy_request['path']}") | |
self.logger.error(f" 请求头: {json.dumps(proxy_request['headers'], indent=2, ensure_ascii=False)}") | |
self.logger.error(f" 查询参数: {json.dumps(proxy_request['query_params'], indent=2, ensure_ascii=False)}") | |
self.logger.error(f" 请求体: {proxy_request['body']}") | |
raise HTTPException( | |
status_code=error_status, | |
detail=error_message | |
) | |
# 设置响应头 | |
headers = header_message.get("headers", {}) | |
status_code = header_message.get("status", 200) | |
# 创建流式响应 | |
return StreamingResponse( | |
self._stream_response_generator(message_queue, headers, request_id), | |
status_code=status_code, | |
headers=headers | |
) | |
async def _stream_response_generator(self, message_queue: MessageQueue, headers: dict, request_id: str): | |
try: | |
while True: | |
try: | |
data_message = await message_queue.dequeue() | |
if data_message.get("type") == "STREAM_END": | |
self.logger.debug(f"收到流结束信号 [ID: {request_id}]") | |
break | |
if data_message.get("event_type") == "error": | |
self.logger.error(f"收到错误信号 [ID: {request_id}]: {data_message.get('message', '未知错误')}") | |
break | |
if data := data_message.get("data"): | |
if isinstance(data, str): | |
yield data.encode('utf-8') | |
else: | |
yield data | |
except Exception as error: | |
if str(error) == "Queue timeout": | |
content_type = headers.get("Content-Type", "") | |
if "text/event-stream" in content_type: | |
yield b": keepalive\n\n" | |
else: | |
self.logger.debug(f"队列超时,结束流式响应 [ID: {request_id}]") | |
break | |
elif str(error) in ["Queue closed", "Queue is closed"]: | |
self.logger.info(f"队列已关闭,结束流式响应 [ID: {request_id}]") | |
break | |
else: | |
self.logger.error(f"流式响应处理错误 [ID: {request_id}]: {str(error)}") | |
raise error | |
except Exception as e: | |
self.logger.error(f"流式响应生成错误 [ID: {request_id}]: {str(e)}") | |
finally: | |
# 流式响应结束后清理资源 | |
self.logger.debug(f"流式响应结束,开始清理资源 [ID: {request_id}]") | |
# 清理消息队列 | |
self.connection_registry.remove_message_queue(request_id) | |
# 清理请求专用连接 | |
connection = self.connection_registry.get_request_connection(request_id) | |
if connection: | |
try: | |
await connection.close() | |
self.logger.debug(f"请求连接已关闭 [ID: {request_id}]") | |
except Exception as e: | |
self.logger.error(f"关闭请求连接失败 [ID: {request_id}]: {str(e)}") | |
# 主服务器类 | |
class ProxyServerSystem: | |
def __init__(self, config: dict = None): | |
if config is None: | |
config = {} | |
# 从环境变量获取端口,Hugging Face Spaces 使用 PORT 环境变量 | |
port = int(os.environ.get("PORT", 7860)) # Hugging Face Spaces 默认端口 | |
host = os.environ.get("HOST", "0.0.0.0") | |
self.config = { | |
"http_port": port, | |
"ws_port": port, # 使用同一个端口 | |
"host": host, | |
**config | |
} | |
self.logger = LoggingService("ProxyServer") | |
self.connection_registry = ConnectionRegistry(self.logger) | |
self.request_handler = RequestHandler(self.connection_registry, self.logger) | |
self.app = FastAPI( | |
title="WebSocket Proxy Server", | |
description="A proxy server with WebSocket support for Hugging Face Spaces", | |
version="1.0.0" | |
) | |
self._setup_routes() | |
self._started_callbacks = [] | |
self._error_callbacks = [] | |
def on_started(self, callback): | |
self._started_callbacks.append(callback) | |
def on_error(self, callback): | |
self._error_callbacks.append(callback) | |
def _setup_routes(self): | |
# 健康检查端点 | |
async def health_check(): | |
return { | |
"status": "healthy", | |
"timestamp": datetime.now().isoformat(), | |
"connections": { | |
"main": len(self.connection_registry.main_connections), | |
"requests": len(self.connection_registry.request_connections) | |
} | |
} | |
# 根路径返回简单的 HTML 页面 | |
async def root(): | |
html_content = """ | |
<!DOCTYPE html> | |
<html> | |
<head> | |
<title>WebSocket Proxy Server</title> | |
<meta charset="utf-8"> | |
<style> | |
body { font-family: Arial, sans-serif; margin: 40px; } | |
.container { max-width: 800px; margin: 0 auto; } | |
.status { padding: 10px; border-radius: 5px; margin: 10px 0; } | |
.success { background-color: #d4edda; color: #155724; } | |
.info { background-color: #d1ecf1; color: #0c5460; } | |
code { background-color: #f8f9fa; padding: 2px 4px; border-radius: 3px; } | |
</style> | |
</head> | |
<body> | |
<div class="container"> | |
<h1>WebSocket Proxy Server</h1> | |
<div class="status success"> | |
✅ 服务器运行正常 | |
</div> | |
<h2>连接信息</h2> | |
<div class="info"> | |
<p><strong>主 WebSocket 连接:</strong> <code>ws://your-space-url/ws</code></p> | |
<p><strong>请求专用连接:</strong> <code>ws://your-space-url/ws/request/{request_id}</code></p> | |
<p><strong>健康检查:</strong> <code>/health</code></p> | |
</div> | |
<h2>使用说明</h2> | |
<ol> | |
<li>首先建立主 WebSocket 连接到 <code>/ws</code></li> | |
<li>发送 HTTP 请求到任意路径</li> | |
<li>服务器会通过主连接通知新请求</li> | |
<li>客户端需要建立请求专用连接到 <code>/ws/request/{request_id}</code></li> | |
<li>通过专用连接处理请求和响应</li> | |
</ol> | |
<h2>环境信息</h2> | |
<p>运行在 Hugging Face Spaces 环境中</p> | |
</div> | |
</body> | |
</html> | |
""" | |
return HTMLResponse(content=html_content) | |
# 主WebSocket路由 - 接收初始请求 | |
async def main_websocket_endpoint(websocket: WebSocket): | |
client_info = { | |
"address": websocket.client.host if websocket.client else "unknown" | |
} | |
await self.connection_registry.add_main_connection(websocket, client_info) | |
try: | |
while True: | |
message = await websocket.receive_text() | |
await self.connection_registry.handle_main_message(message) | |
except WebSocketDisconnect: | |
self.logger.info("主WebSocket连接已关闭") | |
except Exception as e: | |
self.logger.error(f"主WebSocket处理错误: {str(e)}") | |
finally: | |
await self.connection_registry.remove_main_connection(websocket) | |
# 请求专用WebSocket路由 - 处理独立请求 | |
async def request_websocket_endpoint(websocket: WebSocket, request_id: str): | |
client_info = { | |
"address": websocket.client.host if websocket.client else "unknown" | |
} | |
await self.connection_registry.add_request_connection(websocket, request_id, client_info) | |
try: | |
while True: | |
message = await websocket.receive_text() | |
await self.connection_registry.handle_request_message(message, request_id) | |
except WebSocketDisconnect: | |
self.logger.info(f"请求WebSocket连接已关闭 [ID: {request_id}]") | |
except Exception as e: | |
self.logger.error(f"请求WebSocket处理错误 [ID: {request_id}]: {str(e)}") | |
finally: | |
await self.connection_registry.remove_request_connection(websocket, request_id) | |
# API 路由前缀,避免与根路径冲突 | |
async def api_proxy(request: Request, path: str): | |
return await self.request_handler.process_request(request) | |
# 通配符路由处理其他HTTP请求(排除根路径和健康检查) | |
async def catch_all(request: Request, path: str): | |
# 排除特殊路径 | |
if path in ["", "health", "ws"] or path.startswith("ws/"): | |
raise HTTPException(status_code=404, detail="Not Found") | |
return await self.request_handler.process_request(request) | |
async def start(self): | |
try: | |
# 启动HTTP服务器 | |
config = uvicorn.Config( | |
app=self.app, | |
host=self.config["host"], | |
port=self.config["http_port"], | |
log_level="info", | |
access_log=True | |
) | |
server = uvicorn.Server(config) | |
self.logger.info(f"HTTP服务器启动: http://{self.config['host']}:{self.config['http_port']}") | |
self.logger.info(f"主WebSocket服务器启动: ws://{self.config['host']}:{self.config['http_port']}/ws") | |
self.logger.info(f"请求WebSocket服务器启动: ws://{self.config['host']}:{self.config['http_port']}/ws/request/{{request_id}}") | |
self.logger.info("代理服务器系统启动完成 - 适配 Hugging Face Spaces") | |
# 触发启动事件 | |
for callback in self._started_callbacks: | |
callback() | |
# 启动服务器 | |
await server.serve() | |
except Exception as error: | |
self.logger.error(f"启动失败: {str(error)}") | |
# 触发错误事件 | |
for callback in self._error_callbacks: | |
callback(error) | |
raise error | |
# 启动函数 | |
async def initialize_server(): | |
server_system = ProxyServerSystem() | |
try: | |
await server_system.start() | |
except Exception as error: | |
print(f"服务器启动失败: {str(error)}") | |
raise | |
# 主程序入口 | |
if __name__ == "__main__": | |
try: | |
asyncio.run(initialize_server()) | |
except KeyboardInterrupt: | |
print("服务器已停止") | |