TalentLLM / TalentLLM-main /Candidate.py
AzeezIsh's picture
Upload 71 files
37d3a3b
from typing import Optional
import gdown
import os
from datetime import datetime # Importing the datetime class directly
from gh import getBasicReport
from mathpix import extract_text
from pathlib import Path
class JobCandidate:
def __init__(self, data: list):
self.timestamp = datetime.strptime(data[0], "%m/%d/%Y %H:%M:%S")
self.name = data[1]
self.email = data[2]
self.resume_link = data[3]
self.resume_text= self.parse_resume()
self.cover_letter = data[4]
self.linkedin = data[5]
self.github_link = data[6]
self.github_text= self.parse_gh()
self.personal_website_link = data[7]
self.visa_sponsorship = data[8]
self.disability_status = data[9]
self.ethnic_background = data[10]
self.gender = data[11]
self.military_service = data[12]
def __str__(self):
return (f"Job Candidate: {self.name}\n"
f"Applied on: {self.timestamp}\n"
f"Email: {self.email}\n"
f"Resume {self.resume_text}\n"
f"Personal Website: {self.personal_website_link}\n"
f"Visa Sponsorship: {self.visa_sponsorship}\n"
f"Disability Status: {self.disability_status}\n"
f"Ethnic Background: {self.ethnic_background}\n"
f"Gender: {self.gender}\n"
f"Military Service: {self.military_service}")
def parse_resume(self):
id = self.resume_link.split('=')[-1]
pdf_dir = os.path.join(os.getcwd(), "resume_pdfs")
mmd_dir = os.path.join(os.getcwd(), "resume_mmds")
# Ensure the directories exist
if not os.path.exists(pdf_dir):
os.makedirs(pdf_dir)
if not os.path.exists(mmd_dir):
os.makedirs(mmd_dir)
pdf_path = os.path.join(pdf_dir, f"{self.email}.pdf")
mmd_path = os.path.join(mmd_dir, f"{self.email}.pdf.mmd")
try:
# Check if the parsed text already exists
if os.path.exists(mmd_path):
with open(mmd_path, "r") as f:
return f.read()
else:
# Download the PDF
gdown.download(id=id, quiet=True, use_cookies=False, output=pdf_path)
# Check if the download was successful
if os.path.exists(pdf_path):
t = extract_text(pdf_path)
preproccessed = t.replace(self.name, "applicant")
preprocessed = preproccessed.replace(self.name.split(" ")[0], "applicant")
return preprocessed
else:
return "Failed to download the PDF."
except Exception as e:
return str(e)
def parse_gh(self):
username = self.github_link.replace("https://github.com/", "").replace("github.com", "").replace("/", "")
summary=""
if username:
file_path = Path(os.getcwd()) / "gh_cache" / f"{username}.md"
if not file_path.exists():
summary = str(getBasicReport(username))
# Write the summary to the file
file_path.write_text(summary)
else:
summary = open(file_path,"r").read()
return summary
else:
return ""
def parse_portfolio(self):
pass
def __lt__(self, other):
if not isinstance(other, JobCandidate):
return NotImplemented
return self.timestamp < other.timestamp
def __eq__(self, other):
return False