Spaces:
Running
Running
from playwright.sync_api import sync_playwright | |
# 用于存储捕获到的请求头信息 | |
all_request_headers_info = [] | |
def handle_request(request): | |
""" | |
捕获每个请求的URL, 方法和头部信息 | |
""" | |
# print(f"Intercepted request to: {request.url}") # 调试时可以取消注释 | |
all_request_headers_info.append({ | |
"url": request.url, | |
"method": request.method, | |
"headers": request.headers # request.headers 是一个字典 | |
}) | |
def main(): | |
with sync_playwright() as p: | |
# 启动浏览器,可以是 chromium, firefox, or webkit | |
# headless=False 可以看到浏览器操作,True则为无头模式 | |
browser = p.chromium.launch(headless=True, | |
args=[ | |
'--no-sandbox', | |
'--disable-setuid-sandbox', | |
'--disable-dev-shm-usage' # 有时也需要这个,但 --shm-size 更好 | |
]) | |
# 创建一个新的浏览器上下文 | |
# 可以在这里设置 user_agent, viewport, etc. | |
context = browser.new_context( | |
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:138.0) Gecko/20100101 Firefox/138.0", | |
) | |
# 在上下文中创建一个新页面 | |
page = context.new_page() | |
# 注册请求拦截器,这必须在导航之前完成 | |
# 'request' 事件会在每个HTTP请求发起时触发 | |
page.on("request", handle_request) | |
print(f"Navigating to https://grok.com/ ...") | |
try: | |
# 访问目标网站,设置一个合理的超时时间(例如60秒) | |
page.goto("https://grok.com/", timeout=60000) | |
page.wait_for_timeout(5000) | |
print("Page loaded. Waiting for 10 seconds for dynamic content or further requests...") | |
# 检查是否仍然被 Cloudflare 阻止 (例如,查找特定的标题或元素) | |
title = page.title() | |
print(f"Page title: {title}") | |
if "请稍候…" in page.content() or "Just a moment..." in page.content() or "Cloudflare" in title or "Checking your browser" in title: | |
print("Still on a Cloudflare challenge page. Waiting longer or trying interaction...") | |
# 你可能需要在这里添加更长的等待或模拟用户交互 | |
# 例如,等待特定的元素出现,表明挑战已通过 | |
try: | |
page.wait_for_selector("body:not(:has-text('请稍候…'))", timeout=60000) | |
print("Cloudflare challenge likely passed.") | |
title = page.title() | |
print(f"New page title: {title}") | |
page.screenshot(path="cf_passed.png") | |
except Exception as e: | |
print(f"Failed to pass Cloudflare challenge after extended wait: {e}") | |
page.screenshot(path="cf_failed.png") | |
else: | |
print("Successfully navigated to the page.") | |
page.screenshot(path="cf_success.png") | |
page.evaluate(""" | |
function(){ | |
const element = document.getElementById('turnstile-widget'); | |
if (element) { | |
element.style.display = 'none'; | |
} | |
} | |
""") | |
page.wait_for_timeout(10000) | |
try: | |
textarea_locator = page.get_by_label("Ask Grok anything") | |
textarea_locator.fill("你好") | |
print("Successfully entered '你好' into the textarea.") | |
except Exception as e: | |
print(f"Could not find or fill the textarea with aria-label 'Ask Grok anything'. Error: {e}") | |
browser.close() | |
return | |
# 2. 查找 aria-label 为“提交”的 button 并点击 | |
# 使用 get_by_role('button', name='...') 是 Playwright 推荐的方式来查找具有特定可访问名称的按钮 | |
try: | |
submit_button_locator = page.get_by_role("button", name="Submit") | |
submit_button_locator.click() | |
print("Successfully clicked the 'Submit' button.") | |
except Exception as e: | |
print(f"Could not find or click the button with aria-label 'Submit'. Error: {e}") | |
browser.close() | |
return | |
# 等待10秒 | |
# Playwright 的 page.wait_for_timeout() 是首选,因为它与Playwright的事件循环集成 | |
# page.wait_for_timeout(10000) | |
# 或者使用 time.sleep(10) 也可以,但在Playwright脚本中前者更佳 | |
print("\n--- Cookies ---") | |
# 获取当前上下文中的所有cookies | |
cookies = context.cookies() | |
if cookies: | |
for cookie in cookies: | |
print( | |
f"Name: {cookie['name']}, Value: {cookie['value']}, Domain: {cookie['domain']}, Path: {cookie['path']}") | |
else: | |
print("No cookies found.") | |
print("\n--- Request Headers (collected during the session) ---") | |
if all_request_headers_info: | |
# 打印捕获到的每个请求的头部信息 | |
# 注意:这里会包含所有资源的请求(HTML, CSS, JS, XHR, 图片等) | |
for i, req_info in enumerate(all_request_headers_info): | |
if req_info['url'] == 'https://grok.com/rest/app-chat/conversations/new': | |
datas = { | |
'x-xai-request-id': req_info['headers']['x-xai-request-id'], | |
'x-statsig-id': req_info['headers']['x-statsig-id'], | |
'user-agent': req_info['headers']['user-agent'], | |
} | |
print(datas) | |
return datas | |
else: | |
print("No requests were intercepted (this is unlikely if the page loaded).") | |
except Exception as e: | |
print(f"An error occurred: {e}") | |
finally: | |
# 确保浏览器关闭 | |
print("\nClosing browser...") | |
page.close() | |
browser.close() | |
return None | |
if __name__ == "__main__": | |
main() |