jobs-scraper / app.py
MuhFaridanSutariya
first init
1400c09
raw
history blame
3.45 kB
import streamlit as st
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import pandas as pd
import time
# Instantiate global variables
df = pd.DataFrame(columns=["Title", "Location", "Company", "Link", "Description"])
# Get user input
inputJobTitle = st.text_input("Enter Job Title:")
inputJobLocation = st.text_input("Enter Job Location:")
totalPages = st.number_input("Enter Total Pages:", min_value=1, value=1)
submit_button = st.button("Submit")
def scrapeJobDescription(url):
options = Options()
options.add_argument("--window-size=1920,1080")
options.add_argument("--headless=new")
driver = webdriver.Chrome(options=options)
driver.get(url)
html = driver.page_source
soup = BeautifulSoup(html, "html.parser")
try:
jobDescription = soup.find(
"div", class_="show-more-less-html__markup"
).text.strip()
return jobDescription
except:
return ""
def scrapeLinkedin():
global df
global inputJobTitle
global inputJobLocation
counter = 0
pageCounter = 1
options = Options()
options.add_argument("--window-size=1920,1080")
options.add_argument("--headless=new")
driver = webdriver.Chrome(options=options)
while pageCounter <= totalPages:
try:
driver.get(
f"https://www.linkedin.com/jobs/search/?&keywords={inputJobTitle}&location={inputJobLocation}&refresh=true&start={counter}"
)
html = driver.page_source
soup = BeautifulSoup(html, "html.parser")
ulElement = soup.find("ul", class_="jobs-search__results-list")
liElements = ulElement.find_all("li")
for item in liElements:
jobTitle = item.find(
"h3", class_="base-search-card__title"
).text.strip()
jobLocation = item.find(
"span", class_="job-search-card__location"
).text.strip()
jobCompany = item.find(
"h4", class_="base-search-card__subtitle"
).text.strip()
jobLink = item.find_all("a")[0]["href"]
jobDescription = scrapeJobDescription(jobLink)
if jobTitle and jobLocation and jobCompany and jobLink:
df = pd.concat(
[
df,
pd.DataFrame(
{
"Title": [jobTitle],
"Location": [jobLocation],
"Company": [jobCompany],
"Link": [jobLink],
"Description": [jobDescription],
}
),
]
)
counter += 25
pageCounter += 1
except:
break
driver.quit()
def convert_df(df):
return df.to_csv(index=False).encode('utf-8')
if submit_button:
with st.spinner("Operation in progress. Please wait..."):
scrapeLinkedin()
time.sleep(1)
st.write(df)
csv = convert_df(df)
st.download_button(
"Press to Download",
csv,
"file.csv",
"text/csv",
key='download-csv'
)