Spaces:
Paused
Paused
File size: 3,394 Bytes
1400c09 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import streamlit as st
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import pandas as pd
import time
# Instantiate global variables
df = pd.DataFrame(columns=["Title", "Location", "Company", "Link", "Description"])
# Get user input
inputJobTitle = st.text_input("Enter Job Title:")
inputJobLocation = st.text_input("Enter Job Location:")
totalPages = st.number_input("Enter Total Pages:", min_value=1, value=1)
submit_button = st.button("Submit")
def scrapeJobDescription(url):
options = Options()
options.add_argument("--window-size=1920,1080")
options.add_argument("--headless=new")
driver = webdriver.Chrome(options=options)
driver.get(url)
html = driver.page_source
soup = BeautifulSoup(html, "html.parser")
try:
jobDescription = soup.find(
"div", class_="show-more-less-html__markup"
).text.strip()
return jobDescription
except:
return ""
def scrapeLinkedin():
global df
global inputJobTitle
global inputJobLocation
counter = 0
pageCounter = 1
options = Options()
options.add_argument("--headless=new")
driver = webdriver.Chrome(options=options)
while pageCounter <= totalPages:
try:
driver.get(
f"https://www.linkedin.com/jobs/search/?&keywords={inputJobTitle}&location={inputJobLocation}&refresh=true&start={counter}"
)
html = driver.page_source
soup = BeautifulSoup(html, "html.parser")
ulElement = soup.find("ul", class_="jobs-search__results-list")
liElements = ulElement.find_all("li")
for item in liElements:
jobTitle = item.find(
"h3", class_="base-search-card__title"
).text.strip()
jobLocation = item.find(
"span", class_="job-search-card__location"
).text.strip()
jobCompany = item.find(
"h4", class_="base-search-card__subtitle"
).text.strip()
jobLink = item.find_all("a")[0]["href"]
jobDescription = scrapeJobDescription(jobLink)
if jobTitle and jobLocation and jobCompany and jobLink:
df = pd.concat(
[
df,
pd.DataFrame(
{
"Title": [jobTitle],
"Location": [jobLocation],
"Company": [jobCompany],
"Link": [jobLink],
"Description": [jobDescription],
}
),
]
)
counter += 25
pageCounter += 1
except:
break
driver.quit()
def convert_df(df):
return df.to_csv(index=False).encode('utf-8')
if submit_button:
with st.spinner("Operation in progress. Please wait..."):
scrapeLinkedin()
time.sleep(1)
st.write(df)
csv = convert_df(df)
st.download_button(
"Press to Download",
csv,
"file.csv",
"text/csv",
key='download-csv'
)
|