import time import numpy as np import pandas as pd import streamlit as st from streamlit_option_menu import option_menu from streamlit_extras.add_vertical_space import add_vertical_space from PyPDF2 import PdfReader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import FAISS from langchain.chat_models import ChatOpenAI from langchain.chains.question_answering import load_qa_chain from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.common.exceptions import NoSuchElementException import warnings warnings.filterwarnings('ignore') def streamlit_config(): # page configuration st.set_page_config(page_title='Resume Analyzer AI', layout="wide") # page header transparent color page_background_color = """ """ st.markdown(page_background_color, unsafe_allow_html=True) # title and position st.markdown(f'

AI-Powered Resume Analyzer and
LinkedIn Scraper with Selenium

', unsafe_allow_html=True) class resume_analyzer: def pdf_to_chunks(pdf): # read pdf and it returns memory address pdf_reader = PdfReader(pdf) # extrat text from each page separately text = "" for page in pdf_reader.pages: text += page.extract_text() # Split the long text into small chunks. text_splitter = RecursiveCharacterTextSplitter( chunk_size=700, chunk_overlap=200, length_function=len) chunks = text_splitter.split_text(text=text) return chunks def openai(openai_api_key, chunks, analyze): # Using OpenAI service for embedding embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key) # Facebook AI Similarity Serach library help us to convert text data to numerical vector vectorstores = FAISS.from_texts(chunks, embedding=embeddings) # compares the query and chunks, enabling the selection of the top 'K' most similar chunks based on their similarity scores. docs = vectorstores.similarity_search(query=analyze, k=3) # creates an OpenAI object, using the ChatGPT 3.5 Turbo model llm = ChatOpenAI(model='gpt-3.5-turbo', api_key=openai_api_key) # question-answering (QA) pipeline, making use of the load_qa_chain function chain = load_qa_chain(llm=llm, chain_type='stuff') response = chain.run(input_documents=docs, question=analyze) return response def summary_prompt(query_with_chunks): query = f''' need to detailed summarization of below resume and finally conclude them """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" {query_with_chunks} """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" ''' return query def resume_summary(): with st.form(key='Summary'): # User Upload the Resume add_vertical_space(1) pdf = st.file_uploader(label='Upload Your Resume', type='pdf') add_vertical_space(1) # Enter OpenAI API Key col1,col2 = st.columns([0.6,0.4]) with col1: openai_api_key = st.text_input(label='Enter OpenAI API Key', type='password') add_vertical_space(2) # Click on Submit Button submit = st.form_submit_button(label='Submit') add_vertical_space(1) add_vertical_space(3) if submit: if pdf is not None and openai_api_key != '': try: with st.spinner('Processing...'): pdf_chunks = resume_analyzer.pdf_to_chunks(pdf) summary_prompt = resume_analyzer.summary_prompt(query_with_chunks=pdf_chunks) summary = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=summary_prompt) st.markdown(f'

Summary:

', unsafe_allow_html=True) st.write(summary) except Exception as e: st.markdown(f'
{e}
', unsafe_allow_html=True) elif pdf is None: st.markdown(f'
Please Upload Your Resume
', unsafe_allow_html=True) elif openai_api_key == '': st.markdown(f'
Please Enter OpenAI API Key
', unsafe_allow_html=True) def strength_prompt(query_with_chunks): query = f'''need to detailed analysis and explain of the strength of below resume and finally conclude them """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" {query_with_chunks} """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" ''' return query def resume_strength(): with st.form(key='Strength'): # User Upload the Resume add_vertical_space(1) pdf = st.file_uploader(label='Upload Your Resume', type='pdf') add_vertical_space(1) # Enter OpenAI API Key col1,col2 = st.columns([0.6,0.4]) with col1: openai_api_key = st.text_input(label='Enter OpenAI API Key', type='password') add_vertical_space(2) # Click on Submit Button submit = st.form_submit_button(label='Submit') add_vertical_space(1) add_vertical_space(3) if submit: if pdf is not None and openai_api_key != '': try: with st.spinner('Processing...'): pdf_chunks = resume_analyzer.pdf_to_chunks(pdf) summary_prompt = resume_analyzer.summary_prompt(query_with_chunks=pdf_chunks) summary = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=summary_prompt) strength_prompt = resume_analyzer.strength_prompt(query_with_chunks=summary) strength = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=strength_prompt) st.markdown(f'

Strength:

', unsafe_allow_html=True) st.write(strength) except Exception as e: st.markdown(f'
{e}
', unsafe_allow_html=True) elif pdf is None: st.markdown(f'
Please Upload Your Resume
', unsafe_allow_html=True) elif openai_api_key == '': st.markdown(f'
Please Enter OpenAI API Key
', unsafe_allow_html=True) def weakness_prompt(query_with_chunks): query = f'''need to detailed analysis and explain of the weakness of below resume and how to improve make a better resume. """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" {query_with_chunks} """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" ''' return query def resume_weakness(): with st.form(key='Weakness'): # User Upload the Resume add_vertical_space(1) pdf = st.file_uploader(label='Upload Your Resume', type='pdf') add_vertical_space(1) # Enter OpenAI API Key col1,col2 = st.columns([0.6,0.4]) with col1: openai_api_key = st.text_input(label='Enter OpenAI API Key', type='password') add_vertical_space(2) # Click on Submit Button submit = st.form_submit_button(label='Submit') add_vertical_space(1) add_vertical_space(3) if submit: if pdf is not None and openai_api_key != '': try: with st.spinner('Processing...'): pdf_chunks = resume_analyzer.pdf_to_chunks(pdf) summary_prompt = resume_analyzer.summary_prompt(query_with_chunks=pdf_chunks) summary = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=summary_prompt) weakness_prompt = resume_analyzer.weakness_prompt(query_with_chunks=summary) weakness = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=weakness_prompt) st.markdown(f'

Weakness and Suggestions:

', unsafe_allow_html=True) st.write(weakness) except Exception as e: st.markdown(f'
{e}
', unsafe_allow_html=True) elif pdf is None: st.markdown(f'
Please Upload Your Resume
', unsafe_allow_html=True) elif openai_api_key == '': st.markdown(f'
Please Enter OpenAI API Key
', unsafe_allow_html=True) def job_title_prompt(query_with_chunks): query = f''' what are the job roles i apply to likedin based on below? """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" {query_with_chunks} """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" ''' return query def job_title_suggestion(): with st.form(key='Job Titles'): # User Upload the Resume add_vertical_space(1) pdf = st.file_uploader(label='Upload Your Resume', type='pdf') add_vertical_space(1) # Enter OpenAI API Key col1,col2 = st.columns([0.6,0.4]) with col1: openai_api_key = st.text_input(label='Enter OpenAI API Key', type='password') add_vertical_space(2) # Click on Submit Button submit = st.form_submit_button(label='Submit') add_vertical_space(1) add_vertical_space(3) if submit: if pdf is not None and openai_api_key != '': try: with st.spinner('Processing...'): pdf_chunks = resume_analyzer.pdf_to_chunks(pdf) summary_prompt = resume_analyzer.summary_prompt(query_with_chunks=pdf_chunks) summary = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=summary_prompt) job_title_prompt = resume_analyzer.job_title_prompt(query_with_chunks=summary) job_title = resume_analyzer.openai(openai_api_key=openai_api_key, chunks=pdf_chunks, analyze=job_title_prompt) st.markdown(f'

Job Titles:

', unsafe_allow_html=True) st.write(job_title) except Exception as e: st.markdown(f'
{e}
', unsafe_allow_html=True) elif pdf is None: st.markdown(f'
Please Upload Your Resume
', unsafe_allow_html=True) elif openai_api_key == '': st.markdown(f'
Please Enter OpenAI API Key
', unsafe_allow_html=True) class linkedin_scraper: def webdriver_setup(): options = webdriver.ChromeOptions() options.add_argument('--headless') options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') driver = webdriver.Chrome(options=options) driver.maximize_window() return driver def get_userinput(): add_vertical_space(2) with st.form(key='linkedin_scarp'): add_vertical_space(1) col1,col2,col3 = st.columns([0.5,0.3,0.2], gap='medium') with col1: job_title_input = st.text_input(label='Job Title') job_title_input = job_title_input.split(',') with col2: job_location = st.text_input(label='Job Location', value='India') with col3: job_count = st.number_input(label='Job Count', min_value=1, value=1, step=1) # Submit Button add_vertical_space(1) submit = st.form_submit_button(label='Submit') add_vertical_space(1) return job_title_input, job_location, job_count, submit def build_url(job_title, job_location): b = [] for i in job_title: x = i.split() y = '%20'.join(x) b.append(y) job_title = '%2C%20'.join(b) link = f"https://in.linkedin.com/jobs/search?keywords={job_title}&location={job_location}&locationId=&geoId=102713980&f_TPR=r604800&position=1&pageNum=0" return link def open_link(driver, link): while True: # Break the Loop if the Element is Found, Indicating the Page Loaded Correctly try: driver.get(link) driver.implicitly_wait(5) time.sleep(3) driver.find_element(by=By.CSS_SELECTOR, value='span.switcher-tabs__placeholder-text.m-auto') return # Retry Loading the Page except NoSuchElementException: continue def link_open_scrolldown(driver, link, job_count): # Open the Link in LinkedIn linkedin_scraper.open_link(driver, link) # Scroll Down the Page for i in range(0,job_count): # Simulate clicking the Page Up button body = driver.find_element(by=By.TAG_NAME, value='body') body.send_keys(Keys.PAGE_UP) # Scoll down the Page to End driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") driver.implicitly_wait(2) # Click on See More Jobs Button if Present try: x = driver.find_element(by=By.CSS_SELECTOR, value="button[aria-label='See more jobs']").click() driver.implicitly_wait(5) except: pass def job_title_filter(scrap_job_title, user_job_title_input): # User Job Title Convert into Lower Case user_input = [i.lower().strip() for i in user_job_title_input] # scraped Job Title Convert into Lower Case scrap_title = [i.lower().strip() for i in [scrap_job_title]] # Verify Any User Job Title in the scraped Job Title confirmation_count = 0 for i in user_input: if all(j in scrap_title[0] for j in i.split()): confirmation_count += 1 # Return Job Title if confirmation_count greater than 0 else return NaN if confirmation_count > 0: return scrap_job_title else: return np.nan def scrap_company_data(driver, job_title_input, job_location): # scraping the Company Data company = driver.find_elements(by=By.CSS_SELECTOR, value='h4[class="base-search-card__subtitle"]') company_name = [i.text for i in company] location = driver.find_elements(by=By.CSS_SELECTOR, value='span[class="job-search-card__location"]') company_location = [i.text for i in location] title = driver.find_elements(by=By.CSS_SELECTOR, value='h3[class="base-search-card__title"]') job_title = [i.text for i in title] url = driver.find_elements(by=By.XPATH, value='//a[contains(@href, "/jobs/")]') website_url = [i.get_attribute('href') for i in url] # combine the all data to single dataframe df = pd.DataFrame(company_name, columns=['Company Name']) df['Job Title'] = pd.DataFrame(job_title) df['Location'] = pd.DataFrame(company_location) df['Website URL'] = pd.DataFrame(website_url) # Return Job Title if there are more than 1 matched word else return NaN df['Job Title'] = df['Job Title'].apply(lambda x: linkedin_scraper.job_title_filter(x, job_title_input)) # Return Location if User Job Location in Scraped Location else return NaN df['Location'] = df['Location'].apply(lambda x: x if job_location.lower() in x.lower() else np.nan) # Drop Null Values and Reset Index df = df.dropna() df.reset_index(drop=True, inplace=True) return df def scrap_job_description(driver, df, job_count): # Get URL into List website_url = df['Website URL'].tolist() # Scrap the Job Description job_description, description_count = [], 0 for i in range(0, len(website_url)): try: # Open the Link in LinkedIn linkedin_scraper.open_link(driver, website_url[i]) # Click on Show More Button driver.find_element(by=By.CSS_SELECTOR, value='button[data-tracking-control-name="public_jobs_show-more-html-btn"]').click() driver.implicitly_wait(5) time.sleep(1) # Get Job Description description = driver.find_elements(by=By.CSS_SELECTOR, value='div[class="show-more-less-html__markup relative overflow-hidden"]') data = [i.text for i in description][0] if len(data.strip()) > 0: job_description.append(data) description_count += 1 else: job_description.append('Description Not Available') # If URL cannot Loading Properly except: job_description.append('Description Not Available') # Check Description Count Meets User Job Count if description_count == job_count: break # Filter the Job Description df = df.iloc[:len(job_description), :] # Add Job Description in Dataframe df['Job Description'] = pd.DataFrame(job_description, columns=['Description']) df['Job Description'] = df['Job Description'].apply(lambda x: np.nan if x=='Description Not Available' else x) df = df.dropna() df.reset_index(drop=True, inplace=True) return df def display_data_userinterface(df_final): # Display the Data in User Interface add_vertical_space(1) if len(df_final) > 0: for i in range(0, len(df_final)): st.markdown(f'

Job Posting Details : {i+1}

', unsafe_allow_html=True) st.write(f"Company Name : {df_final.iloc[i,0]}") st.write(f"Job Title : {df_final.iloc[i,1]}") st.write(f"Location : {df_final.iloc[i,2]}") st.write(f"Website URL : {df_final.iloc[i,3]}") with st.expander(label='Job Desription'): st.write(df_final.iloc[i, 4]) add_vertical_space(3) else: st.markdown(f'
No Matching Jobs Found
', unsafe_allow_html=True) def main(): # Initially set driver to None driver = None try: job_title_input, job_location, job_count, submit = linkedin_scraper.get_userinput() add_vertical_space(2) if submit: if job_title_input != [] and job_location != '': with st.spinner('Chrome Webdriver Setup Initializing...'): driver = linkedin_scraper.webdriver_setup() with st.spinner('Loading More Job Listings...'): # build URL based on User Job Title Input link = linkedin_scraper.build_url(job_title_input, job_location) # Open the Link in LinkedIn and Scroll Down the Page linkedin_scraper.link_open_scrolldown(driver, link, job_count) with st.spinner('scraping Job Details...'): # Scraping the Company Name, Location, Job Title and URL Data df = linkedin_scraper.scrap_company_data(driver, job_title_input, job_location) # Scraping the Job Descriptin Data df_final = linkedin_scraper. scrap_job_description(driver, df, job_count) # Display the Data in User Interface linkedin_scraper.display_data_userinterface(df_final) # If User Click Submit Button and Job Title is Empty elif job_title_input == []: st.markdown(f'
Job Title is Empty
', unsafe_allow_html=True) elif job_location == '': st.markdown(f'
Job Location is Empty
', unsafe_allow_html=True) except Exception as e: add_vertical_space(2) st.markdown(f'
Check Connection! Refresh the Page and Try Again
', unsafe_allow_html=True) finally: if driver: driver.quit() # Streamlit Configuration Setup streamlit_config() add_vertical_space(5) with st.sidebar: add_vertical_space(4) option = option_menu(menu_title='', options=['Summary', 'Strength', 'Weakness', 'Job Titles', 'Linkedin Jobs'], icons=['house-fill', 'database-fill', 'pass-fill', 'list-ul', 'linkedin']) if option == 'Summary': resume_analyzer.resume_summary() elif option == 'Strength': resume_analyzer.resume_strength() elif option == 'Weakness': resume_analyzer.resume_weakness() elif option == 'Job Titles': resume_analyzer.job_title_suggestion() elif option == 'Linkedin Jobs': linkedin_scraper.main()