Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from typing import Dict, Any | |
| import requests | |
| from models import LinkNode, Status | |
| from typing import Dict, Any | |
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| def display_map(link_map: Dict[str, Any]): | |
| """ | |
| Displays the entire link map in collapsible Streamlit expanders. | |
| If a link is not relevant based on its overview, it's tagged with a red icon. | |
| """ | |
| st.header("π Full Exploration Map") | |
| if not link_map: | |
| st.info("The exploration map is empty.") | |
| return | |
| validated_map = {} | |
| for href, dict_node in link_map.items(): | |
| try: | |
| node = LinkNode.model_validate(dict_node) | |
| validated_map[href] = node | |
| except Exception as e: | |
| st.error(f"Failed to validate data for {href}. Skipping. Error: {e}") | |
| continue | |
| sorted_map = sorted(validated_map.items(), key=lambda item: item[1].depth) | |
| for href, node in sorted_map: | |
| st.divider() | |
| st.subheader(f"π [{href}]({href})") | |
| if node.parent: | |
| st.caption(f"Found on: {node.parent}") | |
| status = node.overview.status | |
| if status == Status.RELEVANT: | |
| st.success(f"**Status: RELEVANT** β ") | |
| elif status == Status.IRRELEVANT: | |
| st.warning(f"**Status: IRRELEVANT** β οΈ - Page deemed not relevant to search criteria.") | |
| elif status == Status.FAILED: | |
| st.error(f"**Status: FAILED** β - Could not scrape or analyze this page.") | |
| else: | |
| st.info(f"**Status: UNKNOWN** π‘") | |
| st.markdown("**π Summary**") | |
| st.info(node.overview.summary) | |
| with st.expander("View Full Extracted Data and Found Links"): | |
| st.markdown("##### π Full Extracted Data") | |
| overview_data = node.overview.model_dump() | |
| display_order = ['details', 'required_docs', 'price', 'SLA'] | |
| items_to_display = [] | |
| for key in display_order: | |
| value = overview_data.get(key) | |
| if value: | |
| title = key.replace('_', ' ').capitalize() | |
| items_to_display.append((title, str(value))) | |
| for i, (title, value) in enumerate(items_to_display): | |
| st.markdown(f"**{title}**") | |
| st.markdown(value) | |
| if i < len(items_to_display) - 1: | |
| st.markdown("---") | |
| st.markdown("##### π Links Found on This Page") | |
| if node.child: | |
| st.write(f"Found **{len(node.child)}** link(s):") | |
| links_text = "\n".join(f"- {link}" for link in node.child) | |
| st.text_area("Links", links_text, height=150, key=f"links_{href}") | |
| else: | |
| st.write("No valid links were found on this page.") | |
| def main(): | |
| st.title("π€ Browser Agent: Visa Data Extractor (Streamlit Demo)") | |
| st.markdown("Enter an API Key and a URL to start a recursive web crawl for structured visa information.") | |
| with st.sidebar: | |
| st.header("Configuration") | |
| default_url = "https://www.netherlandsworldwide.nl/visa-the-netherlands/visa-application-form" | |
| url = st.text_input("Starting URL (e.g., website.com)", default_url) | |
| max_depth = st.slider("Max Exploration Depth", min_value=1, max_value=5, value=1) | |
| st.markdown(""" | |
| **Note:** Depth 1 is fast. Depth 2 or 3 can be **very slow** and consume many tokens. | |
| """) | |
| # --- Main Execution --- | |
| if st.button("Start Exploration and Extraction"): | |
| print(f"starting crawl for {url} with depth {max_depth}") | |
| if not url: | |
| st.error("Please enter a valid Starting URL.") | |
| return | |
| with st.spinner(f"Crawling {url} up to depth {max_depth}... (This may take a while)"): | |
| BASE_URI = os.getenv("BASE_URI", "http://localhost:5000") | |
| print(f"{BASE_URI}/scrape") | |
| try: | |
| result = requests.post( | |
| f"{BASE_URI}/scrape", | |
| headers={"Content-Type": "application/json"}, | |
| json={ | |
| "url": url, | |
| "max_depth": max_depth | |
| } | |
| ) | |
| except requests.exceptions.ConnectionError: | |
| st.error(f"Connection Error: Could not connect to the Flask API at {BASE_URI}. Please ensure your Flask app is running (e.g., `flask run`).") | |
| return | |
| except Exception as e: | |
| st.exception(f"An unexpected error occurred during the crawl: {e}") | |
| return | |
| if result.status_code != 200: | |
| st.error(f"Exploration failed with status {result.status_code}: {result.text}") | |
| return | |
| data = result.json() | |
| display_map(data.get("link_map", {})) | |
| st.subheader("π° Accumulated Token Usage (All LLM Calls)") | |
| token_usage = data.get("token_usage", {"input": 0, "output": 0, "total": 0}) | |
| st.write(f"**Input Tokens:** {token_usage['input']}") | |
| st.write(f"**Output Tokens:** {token_usage['output']}") | |
| st.write(f"**Total Tokens:** {token_usage['total']}") | |
| if __name__ == "__main__": | |
| main() | |