Spaces:

Startup-Exchange
/

TalentLLM

No application file

File size: 3,638 Bytes

37d3a3b

from typing import Optional
import gdown
import os
from datetime import datetime  # Importing the datetime class directly
from gh import getBasicReport
from mathpix import extract_text
from pathlib import Path 
class JobCandidate:
    def __init__(self, data: list):
        self.timestamp = datetime.strptime(data[0], "%m/%d/%Y %H:%M:%S")
        self.name = data[1]
        self.email = data[2]
        self.resume_link = data[3]
        self.resume_text= self.parse_resume()
        self.cover_letter = data[4]
        self.linkedin = data[5]
        self.github_link = data[6]
        self.github_text= self.parse_gh()
        self.personal_website_link = data[7]
        self.visa_sponsorship = data[8]
        self.disability_status = data[9]
        self.ethnic_background = data[10]
        self.gender = data[11]
        self.military_service = data[12]

    def __str__(self):
        return (f"Job Candidate: {self.name}\n"
                f"Applied on: {self.timestamp}\n"
                f"Email: {self.email}\n"
                f"Resume {self.resume_text}\n"
                f"Personal Website: {self.personal_website_link}\n"
                f"Visa Sponsorship: {self.visa_sponsorship}\n"
                f"Disability Status: {self.disability_status}\n"
                f"Ethnic Background: {self.ethnic_background}\n"
                f"Gender: {self.gender}\n"
                f"Military Service: {self.military_service}")

    def parse_resume(self):
        id = self.resume_link.split('=')[-1]
        pdf_dir = os.path.join(os.getcwd(), "resume_pdfs")
        mmd_dir = os.path.join(os.getcwd(), "resume_mmds")

        # Ensure the directories exist
        if not os.path.exists(pdf_dir):
            os.makedirs(pdf_dir)
        if not os.path.exists(mmd_dir):
            os.makedirs(mmd_dir)

        pdf_path = os.path.join(pdf_dir, f"{self.email}.pdf")
        mmd_path = os.path.join(mmd_dir, f"{self.email}.pdf.mmd")

        try:
            # Check if the parsed text already exists
            if os.path.exists(mmd_path):
                with open(mmd_path, "r") as f:
                    return f.read()
            else:
                # Download the PDF
                gdown.download(id=id, quiet=True, use_cookies=False, output=pdf_path)
                
                # Check if the download was successful
                if os.path.exists(pdf_path):
                    t = extract_text(pdf_path)
                    preproccessed = t.replace(self.name, "applicant")
                    preprocessed = preproccessed.replace(self.name.split(" ")[0], "applicant")
                    return preprocessed
                else:
                    return "Failed to download the PDF."
        except Exception as e:
            return str(e)  



    def parse_gh(self):
        username = self.github_link.replace("https://github.com/", "").replace("github.com", "").replace("/", "")

        summary=""
        if username:
            file_path = Path(os.getcwd()) / "gh_cache" / f"{username}.md"
            if not file_path.exists():
                summary = str(getBasicReport(username))
                # Write the summary to the file
                file_path.write_text(summary)
            else:
                summary = open(file_path,"r").read()
            return summary
        else:
            return ""
    def parse_portfolio(self):
        pass

    

    
    
    
    def __lt__(self, other):
        if not isinstance(other, JobCandidate):
            return NotImplemented
        return self.timestamp < other.timestamp

    def __eq__(self, other):
        return False