File size: 5,962 Bytes
c0b8476 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
from playwright.sync_api import sync_playwright
# 用于存储捕获到的请求头信息
all_request_headers_info = []
def handle_request(request):
"""
捕获每个请求的URL, 方法和头部信息
"""
# print(f"Intercepted request to: {request.url}") # 调试时可以取消注释
all_request_headers_info.append({
"url": request.url,
"method": request.method,
"headers": request.headers # request.headers 是一个字典
})
def main():
with sync_playwright() as p:
# 启动浏览器,可以是 chromium, firefox, or webkit
# headless=False 可以看到浏览器操作,True则为无头模式
browser = p.chromium.launch(headless=False,
args=[
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage' # 有时也需要这个,但 --shm-size 更好
])
# 创建一个新的浏览器上下文
# 可以在这里设置 user_agent, viewport, etc.
context = browser.new_context(
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:138.0) Gecko/20100101 Firefox/138.0",
)
# 在上下文中创建一个新页面
page = context.new_page()
# 注册请求拦截器,这必须在导航之前完成
# 'request' 事件会在每个HTTP请求发起时触发
page.on("request", handle_request)
print(f"Navigating to https://grok.com/ ...")
try:
# 访问目标网站,设置一个合理的超时时间(例如60秒)
page.goto("https://grok.com/", timeout=60000)
print("Page loaded. Waiting for 10 seconds for dynamic content or further requests...")
# 检查是否仍然被 Cloudflare 阻止 (例如,查找特定的标题或元素)
title = page.title()
print(f"Page title: {title}")
if "请稍候…" in page.content() or "Just a moment..." in page.content() or "Cloudflare" in title or "Checking your browser" in title:
print("Still on a Cloudflare challenge page. Waiting longer or trying interaction...")
# 你可能需要在这里添加更长的等待或模拟用户交互
# 例如,等待特定的元素出现,表明挑战已通过
try:
page.wait_for_selector("body:not(:has-text('请稍候…'))", timeout=60000)
print("Cloudflare challenge likely passed.")
title = page.title()
print(f"New page title: {title}")
page.screenshot(path="cf_passed.png")
except Exception as e:
print(f"Failed to pass Cloudflare challenge after extended wait: {e}")
page.screenshot(path="cf_failed.png")
else:
print("Successfully navigated to the page.")
page.screenshot(path="cf_success.png")
page.wait_for_timeout(10000)
try:
textarea_locator = page.get_by_label("向Grok提任何问题")
textarea_locator.fill("你好")
print("Successfully entered '你好' into the textarea.")
except Exception as e:
print(f"Could not find or fill the textarea with aria-label '向Grok提任何问题'. Error: {e}")
browser.close()
return
# 2. 查找 aria-label 为“提交”的 button 并点击
# 使用 get_by_role('button', name='...') 是 Playwright 推荐的方式来查找具有特定可访问名称的按钮
try:
submit_button_locator = page.get_by_role("button", name="提交")
submit_button_locator.click()
print("Successfully clicked the '提交' button.")
except Exception as e:
print(f"Could not find or click the button with aria-label '提交'. Error: {e}")
browser.close()
return
# 等待10秒
# Playwright 的 page.wait_for_timeout() 是首选,因为它与Playwright的事件循环集成
# page.wait_for_timeout(10000)
# 或者使用 time.sleep(10) 也可以,但在Playwright脚本中前者更佳
print("\n--- Cookies ---")
# 获取当前上下文中的所有cookies
cookies = context.cookies()
if cookies:
for cookie in cookies:
print(
f"Name: {cookie['name']}, Value: {cookie['value']}, Domain: {cookie['domain']}, Path: {cookie['path']}")
else:
print("No cookies found.")
print("\n--- Request Headers (collected during the session) ---")
if all_request_headers_info:
# 打印捕获到的每个请求的头部信息
# 注意:这里会包含所有资源的请求(HTML, CSS, JS, XHR, 图片等)
for i, req_info in enumerate(all_request_headers_info):
if req_info['url'] == 'https://grok.com/rest/app-chat/conversations/new':
datas = {
'x-xai-request-id': req_info['headers']['x-xai-request-id'],
'x-statsig-id':req_info['headers']['x-statsig-id'],
'user-agent': req_info['headers']['user-agent'],
}
print(datas)
else:
print("No requests were intercepted (this is unlikely if the page loaded).")
except Exception as e:
print(f"An error occurred: {e}")
finally:
# 确保浏览器关闭
print("\nClosing browser...")
browser.close()
if __name__ == "__main__":
main() |