Spaces:
Running
Running
from pathlib import Path | |
import traceback | |
from loguru import logger | |
from playwright.async_api import BrowserContext as ASyncContext, async_playwright | |
import hcaptcha_challenger as solver | |
from hcaptcha_challenger.agents import AgentT, Malenia | |
# Init local-side of the ModelHub | |
solver.install(upgrade=True, clip=True) | |
# Save dataset to current working directory | |
tmp_dir = Path(__file__).parent.joinpath("tmp_dir") | |
async def hit_challenge(context: ASyncContext, host, sitekey, user_data_dir, times: int = 8): | |
await context.route('**/*', lambda route, request: route_continuation(route, request, host, sitekey)) | |
page = context.pages[0] | |
agent = AgentT.from_page(page=page, tmp_dir=tmp_dir, self_supervised=True) | |
await page.goto(f"https://{host}") | |
await agent.handle_checkbox() | |
for pth in range(1, times): | |
result = await agent() | |
print(f">> {pth} - Challenge Result: {result}") | |
match result: | |
case agent.status.CHALLENGE_BACKCALL: | |
await page.wait_for_timeout(500) | |
fl = page.frame_locator(agent.HOOK_CHALLENGE) | |
await fl.locator("//div[@class='refresh button']").click() | |
case agent.status.CHALLENGE_SUCCESS: | |
rqdata = agent.cr.__dict__ | |
await context.close() | |
return rqdata["generated_pass_UUID"] | |
async def route_continuation(route, request, host, sitekey): | |
# 检查请求的URL,只拦截特定网站的请求 | |
if request.url == f"https://{host}/": | |
print("start to solve") | |
# 修改DNS解析结果 | |
await route.fulfill(status=200, | |
body=""" | |
<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<title>hCAPTCHA 演示</title> | |
<meta http-equiv="X-UA-Compatible" content="IE=edge"> | |
<meta name="viewport" content="width=device-width, user-scalable=yes"> | |
<script src="https://js.hcaptcha.com/1/api.js" type="text/javascript" async defer></script> | |
</head> | |
<body> | |
<br><br> | |
<div class="sample-form"> | |
<form id="hcaptcha-demo-form" method="POST"> | |
<div id="hcaptcha-demo" class="h-captcha" data-sitekey="%%%%%%%%%%%" data-callback="onSuccess" data-expired-callback="onExpire"></div> | |
<script> | |
// success callback | |
var onSuccess = function(response) { | |
var errorDivs = document.getElementsByClassName("hcaptcha-error"); | |
if (errorDivs.length) { | |
errorDivs[0].className = ""; | |
} | |
var errorMsgs = document.getElementsByClassName("hcaptcha-error-message"); | |
if (errorMsgs.length) { | |
errorMsgs[0].parentNode.removeChild(errorMsgs[0]); | |
} | |
var logEl = document.querySelector(".hcaptcha-success"); | |
logEl.innerHTML = "挑战成功!" | |
}; | |
var onExpire = function(response) { | |
var logEl = document.querySelector(".hcaptcha-success"); | |
logEl.innerHTML = "令牌已过期。" | |
}; | |
</script> | |
<div class="hcaptcha-success smsg" aria-live="polite"></div> | |
</body> | |
<script type="text/javascript"> | |
// beacon example | |
function addEventHandler(object,szEvent,cbCallback){ | |
if(!!object.addEventListener){ // for modern browsers or IE9+ | |
return object.addEventListener(szEvent,cbCallback); | |
} | |
if(!!object.attachEvent){ // for IE <=8 | |
return object.attachEvent(szEvent,cbCallback); | |
} | |
}; | |
// Ex: triggers pageview beacon | |
addEventHandler(window,'load',function(){b();}); | |
// Ex: triggers event beacon without pageview | |
addEventHandler(window,'load',function(){b({"vt": "e", "ec": "test_cat", "ea": "test_action"});}); | |
</script> | |
</html> | |
""".replace("%%%%%%%%%%%", sitekey)) | |
else: | |
# 对于其他网站的请求,不做修改 | |
await route.continue_() | |
async def bytedance(host, sitekey, user_data_dirs): | |
print(user_data_dirs) | |
# playwright install firefox --with-deps | |
try: | |
async with async_playwright() as p: | |
context = await p.firefox.launch_persistent_context( | |
user_data_dir=Path(__file__).parent.joinpath(user_data_dirs), | |
headless=True, | |
locale="en-US" | |
) | |
await Malenia.apply_stealth(context) | |
token = await hit_challenge(context, host, sitekey, Path(__file__).parent.joinpath(user_data_dirs)) | |
return token | |
except Exception as e: | |
await context.close() | |
traceback.print_exc() | |
print(e) | |
return traceback.format_exc() | |