Spaces:
Sleeping
Sleeping
Update Workflow/Scrape.py
Browse files- Workflow/Scrape.py +14 -7
Workflow/Scrape.py
CHANGED
@@ -1,18 +1,25 @@
|
|
1 |
import requests
|
2 |
import bs4
|
3 |
import lxml
|
|
|
|
|
4 |
|
5 |
-
def
|
6 |
-
|
|
|
7 |
|
8 |
-
def
|
9 |
-
places=[]
|
10 |
result=requests.get("https://www.bmcadventures.com/collections/one-day-treks")
|
11 |
-
soup = bs4.BeautifulSoup(result.text, "lxml")
|
12 |
-
|
13 |
-
places = soup.select('h3')
|
14 |
|
|
|
|
|
|
|
15 |
return places
|
16 |
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
|
|
|
1 |
import requests
|
2 |
import bs4
|
3 |
import lxml
|
4 |
+
import pandas as pd
|
5 |
+
import os
|
6 |
|
7 |
+
def get_inputData(filePath):
|
8 |
+
df = pd.read_excel(filePath)
|
9 |
+
return df
|
10 |
|
11 |
+
def open_url(url):
|
|
|
12 |
result=requests.get("https://www.bmcadventures.com/collections/one-day-treks")
|
|
|
|
|
|
|
13 |
|
14 |
+
def do_scrape():
|
15 |
+
soup = bs4.BeautifulSoup(result.text, "lxml")
|
16 |
+
places = soup.select('h3').getText()
|
17 |
return places
|
18 |
|
19 |
+
def display_data():
|
20 |
+
filePath=os.environ.get('INPUT_DATA')
|
21 |
+
df=get_inputData(filePath)
|
22 |
+
st.dataframe(df)
|
23 |
+
|
24 |
|
25 |
|