SR05's picture
Create fetch_data()
14adba0 verified
import requests
import pandas as pd
import streamlit as st
from io import BytesIO
from bs4 import BeautifulSoup
@st.cache_data(ttl=3600)
def fetch_data():
url = "https://www.ireland.ie/en/india/newdelhi/services/visas/processing-times-and-decisions/"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
}
st.write("πŸ”„ Fetching webpage...")
try:
response = requests.get(url, headers=headers, timeout=10) # Timeout added
response.raise_for_status() # Raise an error if request fails
except requests.exceptions.RequestException as e:
st.error(f"❌ Failed to fetch webpage: {e}")
return None, None
# Parse the HTML
st.write("πŸ”„ Parsing webpage...")
soup = BeautifulSoup(response.content, "html.parser")
file_url = None
for link in soup.find_all("a"):
if "Visa decisions made from 1 January 2025" in link.get_text():
file_url = link.get("href")
if not file_url.startswith("http"):
file_url = requests.compat.urljoin(url, file_url)
break
if not file_url:
st.error("❌ Could not find the visa decisions file link.")
return None, None
st.write(f"πŸ“₯ Found file link: {file_url}")
# Download the .ods file
try:
st.write("πŸ”„ Downloading file...")
ods_response = requests.get(file_url, headers=headers, timeout=15) # Timeout added
ods_response.raise_for_status()
except requests.exceptions.RequestException as e:
st.error(f"❌ Failed to download the file: {e}")
return None, None
st.write("πŸ“‚ Processing file...")
ods_file = BytesIO(ods_response.content)
# Read Excel file
try:
df = pd.read_excel(ods_file, engine="odf")
except Exception as e:
st.error(f"❌ Error reading ODS file: {e}")
return None, None
# Drop empty rows
df.dropna(how="all", inplace=True)
df.reset_index(drop=True, inplace=True)
# Find header row
header_rows = df[df.iloc[:, 0].astype(str).str.contains("Application Number", na=False)].index
if len(header_rows) == 0:
st.error("❌ Could not find the header row. Check the file format.")
return None, None
header_index = header_rows[0]
df = df.iloc[header_index + 1:].reset_index(drop=True)
# Rename columns
df.columns = ["Application Number", "Decision"]
df.dropna(inplace=True)
df["Application Number"] = df["Application Number"].astype(str).str.strip()
st.write("βœ… Data loaded successfully!")
return df, "Visa Decisions Report"
precomputed_df, file_name = fetch_data()