import gradio as gr from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.keys import Keys from lavague.ActionEngine import ActionEngine from lavague.defaults import DefaultLocalLLM, DefaultLLM from llama_index.llms.huggingface import HuggingFaceInferenceAPI MAX_CHARS = 1500 # Use this action_engine instead to have a local inference # action_engine = ActionEngine(llm=DefaultLocalLLM()) import os from llama_index.llms.azure_openai import AzureOpenAI api_key=os.getenv("AZURE_OPENAI_KEY") api_version="2023-05-15" azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") model = "gpt-4" deployment_name = "gpt-4-turbo" llm = AzureOpenAI( model=model, deployment_name=deployment_name, api_key=api_key, azure_endpoint=azure_endpoint, api_version=api_version, temperature=0.0 ) action_engine = ActionEngine(llm=llm) ## Setup chrome options chrome_options = Options() chrome_options.add_argument("--headless") # Ensure GUI is off chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--window-size=1600,900") # Set path to chrome/chromedriver as per your configuration import os.path homedir = os.path.expanduser("~") chrome_options.binary_location = "./chrome-linux64/chrome" webdriver_service = Service("./chromedriver-linux64/chromedriver") title = """
Redefining internet surfing by transforming natural language instructions into seamless browser interactions.
Success!
""" full_code += code except Exception as e: output = f"Error in code execution: {str(e)}" status = """Failure! Open the Debug tab for more information
""" return output, code, html, status, full_code def update_image_display(img): driver.save_screenshot("screenshot.png") url = driver.current_url return "screenshot.png", url def show_processing_message(): return "Processing..." def update_image_display(img): driver.save_screenshot("screenshot.png") url = driver.current_url return "screenshot.png", url base_url = "https://huggingface.co/" instructions = ["Click on the Datasets item on the menu, between Models and Spaces", "Click on the search bar 'Filter by name', type 'The Stack', and press 'Enter'", "Scroll by 500 pixels",] with gr.Blocks() as demo: with gr.Tab("LaVague"): with gr.Row(): gr.HTML(title) with gr.Row(): url_input = gr.Textbox(value=base_url, label="Enter URL and press 'Enter' to load the page.") with gr.Row(): with gr.Column(scale=7): image_display = gr.Image(label="Browser", interactive=False) with gr.Column(scale=3): with gr.Accordion(label="Full code", open=False): full_code = gr.Code(value="", language="python", interactive=False) code_display = gr.Code(label="Generated code", language="python", lines=5, interactive=True) status_html = gr.HTML() with gr.Row(): with gr.Column(scale=8): text_area = gr.Textbox(label="Enter instructions and press 'Enter' to generate code.") gr.Examples(examples=instructions, inputs=text_area) with gr.Tab("Debug"): with gr.Row(): with gr.Column(): log_display = gr.Textbox(interactive=False, lines=20) with gr.Column(): source_display = gr.Code(language="html", label="Retrieved nodes", interactive=False, lines=20) with gr.Row(): with gr.Accordion(label="Full HTML", open=False): full_html = gr.Code(language="html", label="Full HTML", interactive=False, lines=20) # Linking components url_input.submit(process_url, inputs=url_input, outputs=image_display) text_area.submit(show_processing_message, outputs=[status_html]).then( process_instruction, inputs=[text_area, url_input], outputs=[code_display, source_display] ).then( exec_code, inputs=[code_display, source_display, full_code], outputs=[log_display, code_display, full_html, status_html, full_code] ).then( update_image_display, inputs=image_display, outputs=[image_display, url_input] ) demo.launch(debug=True)