Spaces:
Running
Running
github-actions[bot]
commited on
Commit
·
2a38ff3
1
Parent(s):
b4a1e45
Update from GitHub Actions
Browse files
main.py
CHANGED
@@ -8,7 +8,7 @@ import uvicorn
|
|
8 |
import asyncio
|
9 |
from urllib.parse import urlparse
|
10 |
import time
|
11 |
-
from curl_cffi import requests
|
12 |
from dotenv import load_dotenv
|
13 |
|
14 |
# 加载多个位置的.env文件
|
@@ -16,7 +16,7 @@ def load_env_files():
|
|
16 |
# 优先尝试加载根目录下的.env
|
17 |
if os.path.exists('.env'):
|
18 |
load_dotenv('.env')
|
19 |
-
|
20 |
# 然后尝试加载 /root/deploy/.scraper.env
|
21 |
deploy_env_path = os.path.join('/root/deploy', '.scraper.env')
|
22 |
if os.path.exists(deploy_env_path):
|
@@ -44,6 +44,15 @@ async def stream_generator(response):
|
|
44 |
"""生成流式响应的生成器函数"""
|
45 |
for chunk in response.iter_content(chunk_size=8192):
|
46 |
if chunk:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
yield chunk
|
48 |
await asyncio.sleep(0.001) # 让出控制权,保持异步特性
|
49 |
|
@@ -55,7 +64,7 @@ async def make_request(method: str, **kwargs):
|
|
55 |
print(f"开始使用{REQUEST_LIB}进行请求")
|
56 |
if REQUEST_LIB == 'cloudscraper':
|
57 |
scraper = cloudscraper.create_scraper(delay=10)
|
58 |
-
|
59 |
# 设置代理
|
60 |
proxy = os.environ.get('PROXY')
|
61 |
if proxy:
|
@@ -63,21 +72,21 @@ async def make_request(method: str, **kwargs):
|
|
63 |
'http': proxy,
|
64 |
'https': proxy
|
65 |
}
|
66 |
-
|
67 |
# 根据方法发送请求
|
68 |
return getattr(scraper, method.lower())(**kwargs)
|
69 |
else:
|
70 |
# 使用 curl_cffi
|
71 |
proxy = os.environ.get('PROXY')
|
72 |
proxies = {'http': proxy, 'https': proxy} if proxy else None
|
73 |
-
|
74 |
# curl_cffi 的请求配置
|
75 |
request_config = {
|
76 |
**kwargs,
|
77 |
'proxies': proxies,
|
78 |
'impersonate': 'chrome110',
|
79 |
}
|
80 |
-
|
81 |
return requests.request(method, **request_config)
|
82 |
|
83 |
@app.get("/", response_class=HTMLResponse)
|
@@ -106,7 +115,7 @@ async def proxy(request: Request):
|
|
106 |
detail="未提供有效的x-ip-token header",
|
107 |
headers={"WWW-Authenticate": "x-ip-token"}
|
108 |
)
|
109 |
-
|
110 |
# 验证token
|
111 |
if auth_header != env_token:
|
112 |
raise HTTPException(
|
@@ -118,26 +127,26 @@ async def proxy(request: Request):
|
|
118 |
target_url = request.query_params.get("url")
|
119 |
if not target_url:
|
120 |
raise HTTPException(status_code=400, detail="必须提供目标URL")
|
121 |
-
|
122 |
# 获取home_url
|
123 |
home_url = request.query_params.get("home")
|
124 |
if not home_url:
|
125 |
# 从target_url中提取home_url
|
126 |
parsed_url = urlparse(target_url)
|
127 |
home_url = f"{parsed_url.scheme}://{parsed_url.netloc}/"
|
128 |
-
|
129 |
# 检查是否请求流式响应
|
130 |
stream_request = "stream" in request.query_params and request.query_params["stream"].lower() in ["true", "1", "yes"]
|
131 |
-
|
132 |
# 获取请求体
|
133 |
body = await request.body()
|
134 |
-
|
135 |
# 获取查询参数
|
136 |
params = dict(request.query_params)
|
137 |
# 从查询参数中移除url和stream参数
|
138 |
params.pop("url", None)
|
139 |
params.pop("stream", None)
|
140 |
-
|
141 |
# 获取原始请求头
|
142 |
headers = dict(request.headers)
|
143 |
# 移除可能导致问题的头
|
@@ -151,7 +160,7 @@ async def proxy(request: Request):
|
|
151 |
headers.pop("host", None)
|
152 |
headers.pop("referer", None)
|
153 |
print(f"{headers}")
|
154 |
-
|
155 |
# 构建请求参数
|
156 |
request_kwargs = {
|
157 |
"url": target_url,
|
@@ -159,14 +168,14 @@ async def proxy(request: Request):
|
|
159 |
"params": params,
|
160 |
"stream": stream_request # 设置stream参数
|
161 |
}
|
162 |
-
|
163 |
# 如果有请求体,添加到请求参数中
|
164 |
if body:
|
165 |
request_kwargs["data"] = body
|
166 |
-
|
167 |
# 使用统一的请求函数发送请求
|
168 |
response = await make_request(request.method, **request_kwargs)
|
169 |
-
|
170 |
# 处理流式响应
|
171 |
if stream_request:
|
172 |
# 创建响应头字典
|
@@ -174,7 +183,7 @@ async def proxy(request: Request):
|
|
174 |
for header_name, header_value in response.headers.items():
|
175 |
if header_name.lower() not in ('content-encoding', 'transfer-encoding', 'content-length'):
|
176 |
headers_dict[header_name] = header_value
|
177 |
-
|
178 |
# 返回流式响应
|
179 |
return StreamingResponse(
|
180 |
stream_generator(response),
|
@@ -188,18 +197,18 @@ async def proxy(request: Request):
|
|
188 |
content=response.content,
|
189 |
status_code=response.status_code,
|
190 |
)
|
191 |
-
|
192 |
# 转发响应头
|
193 |
for header_name, header_value in response.headers.items():
|
194 |
if header_name.lower() not in ('content-encoding', 'transfer-encoding', 'content-length'):
|
195 |
proxy_response.headers[header_name] = header_value
|
196 |
-
|
197 |
# 转发cookies
|
198 |
for cookie_name, cookie_value in response.cookies.items():
|
199 |
proxy_response.set_cookie(key=cookie_name, value=cookie_value)
|
200 |
-
|
201 |
return proxy_response
|
202 |
-
|
203 |
except Exception as e:
|
204 |
error = f"代理请求失败: {str(e)}"
|
205 |
print(error)
|
|
|
8 |
import asyncio
|
9 |
from urllib.parse import urlparse
|
10 |
import time
|
11 |
+
from curl_cffi import requests
|
12 |
from dotenv import load_dotenv
|
13 |
|
14 |
# 加载多个位置的.env文件
|
|
|
16 |
# 优先尝试加载根目录下的.env
|
17 |
if os.path.exists('.env'):
|
18 |
load_dotenv('.env')
|
19 |
+
|
20 |
# 然后尝试加载 /root/deploy/.scraper.env
|
21 |
deploy_env_path = os.path.join('/root/deploy', '.scraper.env')
|
22 |
if os.path.exists(deploy_env_path):
|
|
|
44 |
"""生成流式响应的生成器函数"""
|
45 |
for chunk in response.iter_content(chunk_size=8192):
|
46 |
if chunk:
|
47 |
+
# 打印收到的流内容
|
48 |
+
try:
|
49 |
+
# 尝试解码为文本并打印
|
50 |
+
decoded_chunk = chunk.decode('utf-8', errors='replace')
|
51 |
+
print(f"收到流数据 [大小: {len(chunk)} 字节]: {decoded_chunk}")
|
52 |
+
except Exception as e:
|
53 |
+
# 如果解码失败,打印二进制数据的长度和错误
|
54 |
+
print(f"收到流数据 [大小: {len(chunk)} 字节]: (无法解码为文本: {str(e)})")
|
55 |
+
|
56 |
yield chunk
|
57 |
await asyncio.sleep(0.001) # 让出控制权,保持异步特性
|
58 |
|
|
|
64 |
print(f"开始使用{REQUEST_LIB}进行请求")
|
65 |
if REQUEST_LIB == 'cloudscraper':
|
66 |
scraper = cloudscraper.create_scraper(delay=10)
|
67 |
+
|
68 |
# 设置代理
|
69 |
proxy = os.environ.get('PROXY')
|
70 |
if proxy:
|
|
|
72 |
'http': proxy,
|
73 |
'https': proxy
|
74 |
}
|
75 |
+
|
76 |
# 根据方法发送请求
|
77 |
return getattr(scraper, method.lower())(**kwargs)
|
78 |
else:
|
79 |
# 使用 curl_cffi
|
80 |
proxy = os.environ.get('PROXY')
|
81 |
proxies = {'http': proxy, 'https': proxy} if proxy else None
|
82 |
+
|
83 |
# curl_cffi 的请求配置
|
84 |
request_config = {
|
85 |
**kwargs,
|
86 |
'proxies': proxies,
|
87 |
'impersonate': 'chrome110',
|
88 |
}
|
89 |
+
|
90 |
return requests.request(method, **request_config)
|
91 |
|
92 |
@app.get("/", response_class=HTMLResponse)
|
|
|
115 |
detail="未提供有效的x-ip-token header",
|
116 |
headers={"WWW-Authenticate": "x-ip-token"}
|
117 |
)
|
118 |
+
|
119 |
# 验证token
|
120 |
if auth_header != env_token:
|
121 |
raise HTTPException(
|
|
|
127 |
target_url = request.query_params.get("url")
|
128 |
if not target_url:
|
129 |
raise HTTPException(status_code=400, detail="必须提供目标URL")
|
130 |
+
|
131 |
# 获取home_url
|
132 |
home_url = request.query_params.get("home")
|
133 |
if not home_url:
|
134 |
# 从target_url中提取home_url
|
135 |
parsed_url = urlparse(target_url)
|
136 |
home_url = f"{parsed_url.scheme}://{parsed_url.netloc}/"
|
137 |
+
|
138 |
# 检查是否请求流式响应
|
139 |
stream_request = "stream" in request.query_params and request.query_params["stream"].lower() in ["true", "1", "yes"]
|
140 |
+
|
141 |
# 获取请求体
|
142 |
body = await request.body()
|
143 |
+
|
144 |
# 获取查询参数
|
145 |
params = dict(request.query_params)
|
146 |
# 从查询参数中移除url和stream参数
|
147 |
params.pop("url", None)
|
148 |
params.pop("stream", None)
|
149 |
+
|
150 |
# 获取原始请求头
|
151 |
headers = dict(request.headers)
|
152 |
# 移除可能导致问题的头
|
|
|
160 |
headers.pop("host", None)
|
161 |
headers.pop("referer", None)
|
162 |
print(f"{headers}")
|
163 |
+
|
164 |
# 构建请求参数
|
165 |
request_kwargs = {
|
166 |
"url": target_url,
|
|
|
168 |
"params": params,
|
169 |
"stream": stream_request # 设置stream参数
|
170 |
}
|
171 |
+
|
172 |
# 如果有请求体,添加到请求参数中
|
173 |
if body:
|
174 |
request_kwargs["data"] = body
|
175 |
+
|
176 |
# 使用统一的请求函数发送请求
|
177 |
response = await make_request(request.method, **request_kwargs)
|
178 |
+
|
179 |
# 处理流式响应
|
180 |
if stream_request:
|
181 |
# 创建响应头字典
|
|
|
183 |
for header_name, header_value in response.headers.items():
|
184 |
if header_name.lower() not in ('content-encoding', 'transfer-encoding', 'content-length'):
|
185 |
headers_dict[header_name] = header_value
|
186 |
+
|
187 |
# 返回流式响应
|
188 |
return StreamingResponse(
|
189 |
stream_generator(response),
|
|
|
197 |
content=response.content,
|
198 |
status_code=response.status_code,
|
199 |
)
|
200 |
+
|
201 |
# 转发响应头
|
202 |
for header_name, header_value in response.headers.items():
|
203 |
if header_name.lower() not in ('content-encoding', 'transfer-encoding', 'content-length'):
|
204 |
proxy_response.headers[header_name] = header_value
|
205 |
+
|
206 |
# 转发cookies
|
207 |
for cookie_name, cookie_value in response.cookies.items():
|
208 |
proxy_response.set_cookie(key=cookie_name, value=cookie_value)
|
209 |
+
|
210 |
return proxy_response
|
211 |
+
|
212 |
except Exception as e:
|
213 |
error = f"代理请求失败: {str(e)}"
|
214 |
print(error)
|