gopiashokan commited on
Commit
54bf828
1 Parent(s): 406d206

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +10 -17
  2. requirements.txt +1 -2
app.py CHANGED
@@ -7,12 +7,11 @@ from streamlit_extras.add_vertical_space import add_vertical_space
7
  from PyPDF2 import PdfReader
8
  from langchain.text_splitter import RecursiveCharacterTextSplitter
9
  from langchain.embeddings.openai import OpenAIEmbeddings
10
- from langchain_community.vectorstores import FAISS
11
- from langchain_community.chat_models import ChatOpenAI
12
  from langchain.chains.question_answering import load_qa_chain
13
  from selenium import webdriver
14
  from selenium.webdriver.common.by import By
15
- from selenium.common.exceptions import NoSuchElementException
16
  import warnings
17
  warnings.filterwarnings('ignore')
18
 
@@ -244,43 +243,37 @@ class linkedin_scraper:
244
  website_url = df['Website URL'].tolist()
245
 
246
  # Scrap the Job Description
247
- job_description = []
248
  for i in range(0, len(website_url)):
249
  try:
250
  # Open the URL
251
  driver.get(website_url[i])
252
- driver.implicitly_wait(10)
253
- time.sleep(1)
254
-
255
- # Click on Show More Button
256
- driver.find_element(by=By.CSS_SELECTOR, value='button[data-tracking-control-name="public_jobs_show-more-html-btn"]').click()
257
- driver.implicitly_wait(10)
258
  time.sleep(1)
259
 
260
  # Click on Show More Button
261
  driver.find_element(by=By.CSS_SELECTOR, value='button[data-tracking-control-name="public_jobs_show-more-html-btn"]').click()
262
- driver.implicitly_wait(10)
263
  time.sleep(1)
264
 
265
  # Get Job Description
266
  description = driver.find_elements(by=By.CSS_SELECTOR, value='div[class="show-more-less-html__markup relative overflow-hidden"]')
267
- driver.implicitly_wait(10)
268
  data = [i.text for i in description][0]
269
 
270
  if len(data.strip()) > 0:
271
  job_description.append(data)
 
272
  else:
273
  job_description.append('Description Not Available')
274
-
275
- # Check Description Count Meets User Job Count
276
- if len([i for i in job_description if i != 'Description Not Available']) >= job_count:
277
- break
278
 
279
  # If URL cannot Loading Properly
280
  except:
281
  job_description.append('Description Not Available')
 
 
 
 
282
 
283
-
284
  # Filter the Job Description
285
  df = df.iloc[:len(job_description), :]
286
 
 
7
  from PyPDF2 import PdfReader
8
  from langchain.text_splitter import RecursiveCharacterTextSplitter
9
  from langchain.embeddings.openai import OpenAIEmbeddings
10
+ from langchain.vectorstores import FAISS
11
+ from langchain.chat_models import ChatOpenAI
12
  from langchain.chains.question_answering import load_qa_chain
13
  from selenium import webdriver
14
  from selenium.webdriver.common.by import By
 
15
  import warnings
16
  warnings.filterwarnings('ignore')
17
 
 
243
  website_url = df['Website URL'].tolist()
244
 
245
  # Scrap the Job Description
246
+ job_description, description_count = [], 0
247
  for i in range(0, len(website_url)):
248
  try:
249
  # Open the URL
250
  driver.get(website_url[i])
251
+ driver.implicitly_wait(5)
 
 
 
 
 
252
  time.sleep(1)
253
 
254
  # Click on Show More Button
255
  driver.find_element(by=By.CSS_SELECTOR, value='button[data-tracking-control-name="public_jobs_show-more-html-btn"]').click()
256
+ driver.implicitly_wait(5)
257
  time.sleep(1)
258
 
259
  # Get Job Description
260
  description = driver.find_elements(by=By.CSS_SELECTOR, value='div[class="show-more-less-html__markup relative overflow-hidden"]')
 
261
  data = [i.text for i in description][0]
262
 
263
  if len(data.strip()) > 0:
264
  job_description.append(data)
265
+ description_count += 1
266
  else:
267
  job_description.append('Description Not Available')
 
 
 
 
268
 
269
  # If URL cannot Loading Properly
270
  except:
271
  job_description.append('Description Not Available')
272
+
273
+ # Check Description Count Meets User Job Count
274
+ if description_count == job_count:
275
+ break
276
 
 
277
  # Filter the Job Description
278
  df = df.iloc[:len(job_description), :]
279
 
requirements.txt CHANGED
@@ -5,8 +5,7 @@ streamlit_option_menu
5
  streamlit_extras
6
  PyPDF2
7
  langchain
8
- langchain-community
9
  openai
10
  tiktoken
11
  faiss-cpu
12
- selenium >=4.0.0, < 5.0.0
 
5
  streamlit_extras
6
  PyPDF2
7
  langchain
 
8
  openai
9
  tiktoken
10
  faiss-cpu
11
+ selenium