import requests import bs4 import lxml import pandas as pd import streamlit as st import os os.environ["INPUT_DATA"] = "Input/Sites.xlsx" ## to silence warning def get_inputData(filePath): df= pd.read_excel(io=filePath, engine='openpyxl') return df def open_url(url): result=requests.get(url) return result def do_scrape(result): places=[] soup = bs4.BeautifulSoup(result.text, "lxml") for i in soup.select('h3'): places.append(i.getText()) return places def display_data(): filePath=os.environ.get('INPUT_DATA') df=get_inputData(filePath) #sources = df['SiteName'].tolist() for index, row in df.iterrows(): result=open_url(row['SiteURL']) places=do_scrape(result) pl= pd.DataFrame() for p in places: d = pd.DataFrame({'Source': [row['SiteName']], 'Places': [p]}) pl = pd.concat([pl, d], ignore_index = True) st.dataframe(pl)