streaming-visualization / gsheet_loader.py
mbecchis's picture
Update gsheet_loader.py
658a2f6 verified
import time
import gspread
from google.oauth2.service_account import Credentials
import pandas as pd
import streamlit as st
import os
import json
SCOPES = ["https://www.googleapis.com/auth/spreadsheets.readonly"]
sheet_id = "10nGgqXxunGXo_GI1LxybvsAr1TYSDdNiqqZX6DSTbDA"
headers = [
'Catalog', 'Mapping status', 'Priority', 'Program kinds', 'Customers',
'Size', 'Size Aprox', 'Needed by', 'Recommendations', 'Scraping?',
'Custom provider deeplinks', "Scraping link"
]
def get_creds():
raw = os.getenv("GCP_CREDENTIALS")
if raw is None:
raise ValueError("Missing GCP_CREDENTIALS environment variable.")
return Credentials.from_service_account_info(json.loads(raw), scopes=SCOPES)
def load_gsheet(tab_name: str) -> pd.DataFrame:
creds = get_creds()
client = gspread.authorize(creds)
w = client.open_by_key(sheet_id)
for attempt in range(3):
try:
ws = w.worksheet(tab_name)
if tab_name == "Catalog Status":
df = pd.DataFrame(ws.get_all_records(expected_headers=headers))
else:
df = pd.DataFrame(ws.get_all_records())
return df
except gspread.exceptions.APIError as e:
if attempt < 2:
st.warning(f"Retrying Google API for {tab_name}... ({attempt+1}/3)")
time.sleep(2)
else:
st.error(f"Failed to load '{tab_name}': {e}")
raise e
def get_data():
onboarding = load_gsheet("Catalog Onboarding")
time.sleep(1)
metadata = load_gsheet("NEW Catalog Data levels")
time.sleep(1)
mapping = load_gsheet("Catalog Status")
return onboarding, metadata, mapping