Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -108,7 +108,7 @@ class HTML_TO_MARKDOWN_CONVERTER:
|
|
108 |
return f"\n\n\n\n"
|
109 |
return inner_md
|
110 |
|
111 |
-
async def perform_web_browse(query: str, browser_name: str, search_engine_name: str):
|
112 |
browser_key = browser_name.lower()
|
113 |
if "playwright" not in PLAYWRIGHT_STATE:
|
114 |
PLAYWRIGHT_STATE["playwright"] = await async_playwright().start()
|
@@ -126,11 +126,12 @@ async def perform_web_browse(query: str, browser_name: str, search_engine_name:
|
|
126 |
|
127 |
browser_instance = PLAYWRIGHT_STATE[browser_key]
|
128 |
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
|
|
134 |
url_template = SEARCH_ENGINES.get(search_engine_name)
|
135 |
if not url_template:
|
136 |
return {"status": "error", "query": query, "error_message": f"Invalid search engine: '{search_engine_name}'."}
|
@@ -159,7 +160,7 @@ async def perform_web_browse(query: str, browser_name: str, search_engine_name:
|
|
159 |
markdown_text = converter.convert()
|
160 |
status_code = response.status if response else 0
|
161 |
|
162 |
-
return {"status": "success", "query": query, "final_url": final_url, "page_title": title, "http_status": status_code, "proxy_used": proxy_server_used, "markdown_content": markdown_text}
|
163 |
except Exception as e:
|
164 |
error_message = str(e).splitlines()[0]
|
165 |
if "Timeout" in error_message:
|
@@ -173,16 +174,17 @@ with gr.Blocks(title="Web Browse API", theme=gr.themes.Soft()) as demo:
|
|
173 |
gr.Markdown("# Web Browse API")
|
174 |
gr.Markdown(f"This interface exposes a stateless API endpoint (`/api/web_browse`) to fetch and parse web content. {REVOLVER.count()} proxies loaded.")
|
175 |
|
176 |
-
|
|
|
177 |
|
178 |
with gr.Row():
|
179 |
browser_input = gr.Dropdown(label="Browser", choices=["firefox", "chromium", "webkit"], value="firefox", scale=1)
|
180 |
-
search_engine_input = gr.Dropdown(label="Search Engine (
|
181 |
|
182 |
submit_button = gr.Button("Browse", variant="primary")
|
183 |
output_json = gr.JSON(label="API Result")
|
184 |
|
185 |
-
submit_button.click(fn=perform_web_browse, inputs=[query_input, browser_input, search_engine_input], outputs=output_json, api_name="web_browse")
|
186 |
|
187 |
if __name__ == "__main__":
|
188 |
demo.launch()
|
|
|
108 |
return f"\n\n\n\n"
|
109 |
return inner_md
|
110 |
|
111 |
+
async def perform_web_browse(action: str, query: str, browser_name: str, search_engine_name: str):
|
112 |
browser_key = browser_name.lower()
|
113 |
if "playwright" not in PLAYWRIGHT_STATE:
|
114 |
PLAYWRIGHT_STATE["playwright"] = await async_playwright().start()
|
|
|
126 |
|
127 |
browser_instance = PLAYWRIGHT_STATE[browser_key]
|
128 |
|
129 |
+
if action == "Scrape URL":
|
130 |
+
if not query.startswith(('http://', 'https://')):
|
131 |
+
url = f"http://{query}"
|
132 |
+
else:
|
133 |
+
url = query
|
134 |
+
else: # action == "Search"
|
135 |
url_template = SEARCH_ENGINES.get(search_engine_name)
|
136 |
if not url_template:
|
137 |
return {"status": "error", "query": query, "error_message": f"Invalid search engine: '{search_engine_name}'."}
|
|
|
160 |
markdown_text = converter.convert()
|
161 |
status_code = response.status if response else 0
|
162 |
|
163 |
+
return {"status": "success", "query": query, "action": action, "final_url": final_url, "page_title": title, "http_status": status_code, "proxy_used": proxy_server_used, "markdown_content": markdown_text}
|
164 |
except Exception as e:
|
165 |
error_message = str(e).splitlines()[0]
|
166 |
if "Timeout" in error_message:
|
|
|
174 |
gr.Markdown("# Web Browse API")
|
175 |
gr.Markdown(f"This interface exposes a stateless API endpoint (`/api/web_browse`) to fetch and parse web content. {REVOLVER.count()} proxies loaded.")
|
176 |
|
177 |
+
action_input = gr.Radio(label="Action", choices=["Search", "Scrape URL"], value="Search")
|
178 |
+
query_input = gr.Textbox(label="Query or URL", placeholder="e.g., 'best cat food' or 'www.wikipedia.org'")
|
179 |
|
180 |
with gr.Row():
|
181 |
browser_input = gr.Dropdown(label="Browser", choices=["firefox", "chromium", "webkit"], value="firefox", scale=1)
|
182 |
+
search_engine_input = gr.Dropdown(label="Search Engine (if action is Search)", choices=sorted(list(SEARCH_ENGINES.keys())), value="DuckDuckGo", scale=2)
|
183 |
|
184 |
submit_button = gr.Button("Browse", variant="primary")
|
185 |
output_json = gr.JSON(label="API Result")
|
186 |
|
187 |
+
submit_button.click(fn=perform_web_browse, inputs=[action_input, query_input, browser_input, search_engine_input], outputs=output_json, api_name="web_browse")
|
188 |
|
189 |
if __name__ == "__main__":
|
190 |
demo.launch()
|