Spaces:

Techis
/

resume-screening-tool

Runtime error

File size: 9,428 Bytes
import splitter as sp
import student_name as sc
import field
import bs4
import docx2txt
import re
import requests
import gradio as gr
import pandas as pd
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen


def class_resume(path):
    text_list = sp.split_string(path)  # split all the text data of resume in words
    grad_name = sc.student(text_list)  # Find the Student Name®
    department = field.branch(text_list)  # Find wheater the student from data science or web development

    doc = docx2txt.process(path)  # Text data of resume
    match = re.findall("(?P<url>https?://[^\s]+)", doc)  # find all the urls from the resume
    prof_links = pd.DataFrame(
        {"Professional Links": match[0:3]})  # find professional link - github, linkedin, and portfolio
    project_match = pd.DataFrame(
        {"Project Links": match[3:]})  # Find projects links (websites and github repository link
    link_status = []
    req_code = []
    websites = []
    # Creating a data frame for project links and professional links and see if the links are working or not
    for i in range(len(prof_links["Professional Links"])):
        x = requests.get(prof_links["Professional Links"][i])
        link_status.append(x.reason)
        req_code.append(x.status_code)

    if department == 'Data Science':

        for i in range(len(prof_links["Professional Links"])):
            if "github.com" in prof_links["Professional Links"][i]:
                web_name = 'Github'
                websites.append(web_name)
            elif 'github.io' in prof_links["Professional Links"][i]:
                web_name = 'Portfolio'
                websites.append(web_name)
            else:
                web_name = 'Linkedin'
                websites.append(web_name)
    elif department == 'Web Developer':

        for i in range(len(prof_links["Professional Links"])):
            if "github.com" in prof_links["Professional Links"][i]:
                web_name = 'Github'
                websites.append(web_name)
            elif 'heroku' in prof_links["Professional Links"][i]:
                web_name = 'Portfolio'
                websites.append(web_name)
            else:
                web_name = 'Linkedin'
                websites.append(web_name)

    prof_links = pd.DataFrame({'Websites': websites, "Professional Links": match[0:3], "return Code": req_code, "Link Status": link_status})
    proj_stat = []
    proj_rcode = []
    owner_name = []
    global port_url
    global github_url
    global linkedin_url
    b_status = list()
    link_owner = []
    for i in range(len(project_match["Project Links"])):
        x = requests.get(project_match["Project Links"][i])
        proj_stat.append(x.reason)
        proj_rcode.append(x.status_code)

    for i in range(len(prof_links['Professional Links'])):
        if prof_links['Websites'][i] == 'Portfolio':
            port_url = prof_links['Professional Links'][i]
    for i in range(len(prof_links['Professional Links'])):
        if prof_links['Websites'][i] == 'Github':
            github_url = prof_links['Professional Links'][i]
    for i in range(len(prof_links['Professional Links'])):
        if prof_links['Websites'][i] == 'Linkedin':
            linkedin_url = prof_links['Professional Links'][i]

    if department == 'Web Developer':
        for i in range(len(prof_links['Professional Links'])):
            if prof_links['Professional Links'][i] == port_url:
                reqs = requests.get(port_url)
                soup = BeautifulSoup(reqs.content, 'html.parser')
                stu_name = soup.find(class_="name").get_text()
                if stu_name == grad_name:
                    link_owner.append(grad_name)
                else:
                    link_owner.append(stu_name)
                if stu_name in grad_name:
                    # port_status = 'verified'
                    b_status.append('Verified')
                else:
                    port_status = 'Check Manually'
                    b_status.append(port_status)
            elif prof_links['Professional Links'][i] == github_url:
                req = Request(port_url)
                html_page = urlopen(req)
                soup = BeautifulSoup(html_page, "lxml")
                links = []
                for link in soup.find_all("a"):
                    links.append(link.get('href'))
                if github_url in links:
                    link_owner.append(grad_name)
                    github_status = 'verified'
                    b_status.append(github_status)
                else:
                    owner = 'Unknown'
                    link_owner.append('Unknown')
                    github_status = 'Check Manually'
                    b_status.append(github_status)
            elif prof_links['Professional Links'][i] == linkedin_url:
                req = Request(port_url)
                html_page = urlopen(req)
                req = Request(port_url)
                html_page = urlopen(req)
                soup = BeautifulSoup(html_page, "lxml")
                links = []
                for link in soup.find_all('a'):
                    link.get('href')
                for link in soup.findAll('a'):
                    links.append(link.get('href'))
                if linkedin_url in links:
                    link_owner.append(grad_name)
                    github_status = 'verified'
                    b_status.append(github_status)
                else:
                    owner = 'Unknown'
                    link_owner.append('Unknown')
                    github_status = 'Check Manually'
                    b_status.append(github_status)

    if department == 'Data Science':
        for i in range(len(prof_links['Professional Links'])):
            if prof_links['Professional Links'][i] == port_url:
                reqs = requests.get(port_url)
                soup = BeautifulSoup(reqs.content, 'html.parser')
                for title in soup.find_all('title'):
                    heading = title.get_text()
                if grad_name in heading:
                    owner = grad_name
                    link_owner.append(owner)
                    status = 'Verified'
                    b_status.append(status)
                else:
                    owner = heading[25:]
                    link_owner.append(owner)
                    status = 'Check Manually'
                    b_status.append(status)

            elif prof_links['Professional Links'][i] == github_url:
                req = Request(port_url)
                html_page = urlopen(req)
                soup = BeautifulSoup(html_page, "lxml")
                links = []
                for link in soup.find_all("a"):
                    links.append(link.get('href'))
                if github_url in links:
                    link_owner.append(grad_name)
                    github_status = 'verified'
                    b_status.append(github_status)
                else:
                    owner = 'Unknown'
                    link_owner.append('Unknown')
                    github_status = 'Check Manually'
                    b_status.append(github_status)
            elif prof_links['Professional Links'][i] == linkedin_url:
                req = Request(port_url)
                html_page = urlopen(req)
                req = Request(port_url)
                html_page = urlopen(req)

                soup = BeautifulSoup(html_page, "lxml")
                links = []
                for link in soup.find_all('a'):
                    link.get('href')
                for link in soup.findAll('a'):
                    links.append(link.get('href'))

                if linkedin_url in links:
                    link_owner.append(grad_name)
                    github_status = 'verified'
                    b_status.append(github_status)


                else:
                    owner = 'Unknown'
                    link_owner.append('Unknown')
                    github_status = 'Check Manually'
                    b_status.append(github_status)

    proj_link_owner = []
    proj_link_status = []
    for i in range(len(project_match['Project Links'])):
        req = Request(port_url)
        html_page = urlopen(req)
        soup = BeautifulSoup(html_page, "lxml")
        links = []
        for link in soup.find_all("a"):
            links.append(link.get('href'))
        if project_match['Project Links'][i] in links:
            proj_link_owner.append(grad_name)
            proj_link_status.append('Verified')
        else:
            owner = 'Unknown'
            proj_link_owner.append('Unknown')
            # github_status = 'Check Manually'
            proj_link_status.append('Check Manually')

    prof_links = pd.DataFrame({'Websites': websites, "Professional Links": match[0:3], "return Code": req_code, "Link Status": link_status,"Link owner": link_owner, 'Verification Status': b_status})
    project_match = pd.DataFrame(
        {"Project Links": match[3:], "Request code": proj_rcode, "Status": proj_stat, 'Project Owner': proj_link_owner,
         'Verification Status': proj_link_status})
    return grad_name, department, prof_links, project_match


# Gradio interface
iface = gr.Interface(fn=class_resume, inputs=['file'], outputs=['text', 'text', "dataframe", "dataframe"],
                     title="Resume Screening")
iface.launch()