broadfield-dev commited on
Commit
6c9da90
·
verified ·
1 Parent(s): 56976c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -10
app.py CHANGED
@@ -108,7 +108,7 @@ class HTML_TO_MARKDOWN_CONVERTER:
108
  return f"\n\n![{alt}]({full_src})\n\n"
109
  return inner_md
110
 
111
- async def perform_web_browse(query: str, browser_name: str, search_engine_name: str):
112
  browser_key = browser_name.lower()
113
  if "playwright" not in PLAYWRIGHT_STATE:
114
  PLAYWRIGHT_STATE["playwright"] = await async_playwright().start()
@@ -126,11 +126,12 @@ async def perform_web_browse(query: str, browser_name: str, search_engine_name:
126
 
127
  browser_instance = PLAYWRIGHT_STATE[browser_key]
128
 
129
- is_direct_url = urllib.parse.urlparse(query).scheme in ['http', 'https'] and '.' in urllib.parse.urlparse(query).netloc
130
-
131
- if is_direct_url:
132
- url = query
133
- else:
 
134
  url_template = SEARCH_ENGINES.get(search_engine_name)
135
  if not url_template:
136
  return {"status": "error", "query": query, "error_message": f"Invalid search engine: '{search_engine_name}'."}
@@ -159,7 +160,7 @@ async def perform_web_browse(query: str, browser_name: str, search_engine_name:
159
  markdown_text = converter.convert()
160
  status_code = response.status if response else 0
161
 
162
- return {"status": "success", "query": query, "final_url": final_url, "page_title": title, "http_status": status_code, "proxy_used": proxy_server_used, "markdown_content": markdown_text}
163
  except Exception as e:
164
  error_message = str(e).splitlines()[0]
165
  if "Timeout" in error_message:
@@ -173,16 +174,17 @@ with gr.Blocks(title="Web Browse API", theme=gr.themes.Soft()) as demo:
173
  gr.Markdown("# Web Browse API")
174
  gr.Markdown(f"This interface exposes a stateless API endpoint (`/api/web_browse`) to fetch and parse web content. {REVOLVER.count()} proxies loaded.")
175
 
176
- query_input = gr.Textbox(label="URL or Search Query", placeholder="e.g., https://gradio.app or 'how does gradio work'")
 
177
 
178
  with gr.Row():
179
  browser_input = gr.Dropdown(label="Browser", choices=["firefox", "chromium", "webkit"], value="firefox", scale=1)
180
- search_engine_input = gr.Dropdown(label="Search Engine (for non-URL queries)", choices=sorted(list(SEARCH_ENGINES.keys())), value="DuckDuckGo", scale=2)
181
 
182
  submit_button = gr.Button("Browse", variant="primary")
183
  output_json = gr.JSON(label="API Result")
184
 
185
- submit_button.click(fn=perform_web_browse, inputs=[query_input, browser_input, search_engine_input], outputs=output_json, api_name="web_browse")
186
 
187
  if __name__ == "__main__":
188
  demo.launch()
 
108
  return f"\n\n![{alt}]({full_src})\n\n"
109
  return inner_md
110
 
111
+ async def perform_web_browse(action: str, query: str, browser_name: str, search_engine_name: str):
112
  browser_key = browser_name.lower()
113
  if "playwright" not in PLAYWRIGHT_STATE:
114
  PLAYWRIGHT_STATE["playwright"] = await async_playwright().start()
 
126
 
127
  browser_instance = PLAYWRIGHT_STATE[browser_key]
128
 
129
+ if action == "Scrape URL":
130
+ if not query.startswith(('http://', 'https://')):
131
+ url = f"http://{query}"
132
+ else:
133
+ url = query
134
+ else: # action == "Search"
135
  url_template = SEARCH_ENGINES.get(search_engine_name)
136
  if not url_template:
137
  return {"status": "error", "query": query, "error_message": f"Invalid search engine: '{search_engine_name}'."}
 
160
  markdown_text = converter.convert()
161
  status_code = response.status if response else 0
162
 
163
+ return {"status": "success", "query": query, "action": action, "final_url": final_url, "page_title": title, "http_status": status_code, "proxy_used": proxy_server_used, "markdown_content": markdown_text}
164
  except Exception as e:
165
  error_message = str(e).splitlines()[0]
166
  if "Timeout" in error_message:
 
174
  gr.Markdown("# Web Browse API")
175
  gr.Markdown(f"This interface exposes a stateless API endpoint (`/api/web_browse`) to fetch and parse web content. {REVOLVER.count()} proxies loaded.")
176
 
177
+ action_input = gr.Radio(label="Action", choices=["Search", "Scrape URL"], value="Search")
178
+ query_input = gr.Textbox(label="Query or URL", placeholder="e.g., 'best cat food' or 'www.wikipedia.org'")
179
 
180
  with gr.Row():
181
  browser_input = gr.Dropdown(label="Browser", choices=["firefox", "chromium", "webkit"], value="firefox", scale=1)
182
+ search_engine_input = gr.Dropdown(label="Search Engine (if action is Search)", choices=sorted(list(SEARCH_ENGINES.keys())), value="DuckDuckGo", scale=2)
183
 
184
  submit_button = gr.Button("Browse", variant="primary")
185
  output_json = gr.JSON(label="API Result")
186
 
187
+ submit_button.click(fn=perform_web_browse, inputs=[action_input, query_input, browser_input, search_engine_input], outputs=output_json, api_name="web_browse")
188
 
189
  if __name__ == "__main__":
190
  demo.launch()