| import os |
| import subprocess |
| import asyncio |
| import json |
| import gradio as gr |
| import nest_asyncio |
|
|
| |
| def install_playwright(): |
| try: |
| |
| os.environ['PLAYWRIGHT_BROWSERS_PATH'] = '/home/user/pw-browsers' |
| |
| |
| if not os.path.exists('/home/user/pw-browsers'): |
| print("⏳ Installing Chromium browser... Please wait...") |
| subprocess.run(["python3", "-m", "playwright", "install", "chromium"], check=True) |
| |
| subprocess.run(["python3", "-m", "playwright", "install-deps", "chromium"], check=True) |
| print("✅ Browser installed successfully!") |
| else: |
| print("✅ Browser already exists.") |
| except Exception as e: |
| print(f"⚠️ Installation warning: {e}") |
|
|
| |
| install_playwright() |
|
|
| |
| from crawl4ai import ( |
| AsyncWebCrawler, |
| BrowserConfig, |
| CrawlerRunConfig, |
| CacheMode, |
| LLMConfig, |
| LLMExtractionStrategy |
| ) |
|
|
| nest_asyncio.apply() |
|
|
| async def extract_with_gemini(url, api_key, prompt): |
| if not url or not api_key: |
| return "⚠️ অনুগ্রহ করে URL এবং Gemini API Key দিন।" |
|
|
| |
| browser_config = BrowserConfig( |
| headless=True, |
| extra_args=[ |
| "--disable-gpu", |
| "--disable-dev-shm-usage", |
| "--no-sandbox", |
| "--disable-setuid-sandbox" |
| ] |
| ) |
|
|
| llm_config = LLMConfig( |
| provider="gemini/gemini-2.5-flash", |
| api_token=api_key |
| ) |
|
|
| extraction_strategy = LLMExtractionStrategy( |
| llm_config=llm_config, |
| instruction=prompt, |
| verbose=True |
| ) |
| |
| run_config = CrawlerRunConfig( |
| extraction_strategy=extraction_strategy, |
| cache_mode=CacheMode.BYPASS |
| ) |
|
|
| try: |
| async with AsyncWebCrawler(config=browser_config) as crawler: |
| result = await crawler.arun(url=url, config=run_config) |
| if result.success: |
| try: |
| return json.dumps(json.loads(result.extracted_content), indent=2) |
| except: |
| return result.extracted_content |
| else: |
| return f"❌ এরর: {result.error_message}" |
| except Exception as e: |
| return f"❌ রানটাইম এরর: {str(e)}" |
|
|
| def gradio_wrapper(url, api_key, prompt): |
| |
| loop = asyncio.new_event_loop() |
| asyncio.set_event_loop(loop) |
| try: |
| return loop.run_until_complete(extract_with_gemini(url, api_key, prompt)) |
| finally: |
| loop.close() |
|
|
| |
| with gr.Blocks() as demo: |
| gr.Markdown("# 🤖 Crawl4AI + Gemini AI Extractor") |
| with gr.Row(): |
| with gr.Column(): |
| url_input = gr.Textbox(label="Website URL", placeholder="https://example.com") |
| api_key = gr.Textbox(label="Gemini API Key", type="password") |
| instruction = gr.Textbox(label="কী বের করতে চান?", lines=4) |
| btn = gr.Button("🚀 শুরু করুন", variant="primary") |
| with gr.Column(): |
| output_text = gr.Code(label="Result", language="json") |
|
|
| btn.click(fn=gradio_wrapper, inputs=[url_input, api_key, instruction], outputs=output_text) |
|
|
| if __name__ == "__main__": |
| demo.launch(server_name="0.0.0.0", server_port=7860) |