import splitter as sp import student_name as sc import field import bs4 import docx2txt import re import requests import gradio as gr import pandas as pd from bs4 import BeautifulSoup from urllib.request import Request, urlopen def class_resume(path): text_list = sp.split_string(path) # split all the text data of resume in words grad_name = sc.student(text_list) # Find the Student NameĀ® department = field.branch(text_list) # Find wheater the student from data science or web development doc = docx2txt.process(path) # Text data of resume match = re.findall("(?Phttps?://[^\s]+)", doc) # find all the urls from the resume prof_links = pd.DataFrame( {"Professional Links": match[0:3]}) # find professional link - github, linkedin, and portfolio project_match = pd.DataFrame( {"Project Links": match[3:]}) # Find projects links (websites and github repository link link_status = [] req_code = [] websites = [] # Creating a data frame for project links and professional links and see if the links are working or not for i in range(len(prof_links["Professional Links"])): x = requests.get(prof_links["Professional Links"][i]) link_status.append(x.reason) req_code.append(x.status_code) if department == 'Data Science': for i in range(len(prof_links["Professional Links"])): if "github.com" in prof_links["Professional Links"][i]: web_name = 'Github' websites.append(web_name) elif 'github.io' in prof_links["Professional Links"][i]: web_name = 'Portfolio' websites.append(web_name) else: web_name = 'Linkedin' websites.append(web_name) elif department == 'Web Developer': for i in range(len(prof_links["Professional Links"])): if "github.com" in prof_links["Professional Links"][i]: web_name = 'Github' websites.append(web_name) elif 'heroku' in prof_links["Professional Links"][i]: web_name = 'Portfolio' websites.append(web_name) else: web_name = 'Linkedin' websites.append(web_name) prof_links = pd.DataFrame({'Websites': websites, "Professional Links": match[0:3], "return Code": req_code, "Link Status": link_status}) proj_stat = [] proj_rcode = [] owner_name = [] global port_url global github_url global linkedin_url b_status = list() link_owner = [] for i in range(len(project_match["Project Links"])): x = requests.get(project_match["Project Links"][i]) proj_stat.append(x.reason) proj_rcode.append(x.status_code) for i in range(len(prof_links['Professional Links'])): if prof_links['Websites'][i] == 'Portfolio': port_url = prof_links['Professional Links'][i] for i in range(len(prof_links['Professional Links'])): if prof_links['Websites'][i] == 'Github': github_url = prof_links['Professional Links'][i] for i in range(len(prof_links['Professional Links'])): if prof_links['Websites'][i] == 'Linkedin': linkedin_url = prof_links['Professional Links'][i] if department == 'Web Developer': for i in range(len(prof_links['Professional Links'])): if prof_links['Professional Links'][i] == port_url: reqs = requests.get(port_url) soup = BeautifulSoup(reqs.content, 'html.parser') stu_name = soup.find(class_="name").get_text() if stu_name == grad_name: link_owner.append(grad_name) else: link_owner.append(stu_name) if stu_name in grad_name: # port_status = 'verified' b_status.append('Verified') else: port_status = 'Check Manually' b_status.append(port_status) elif prof_links['Professional Links'][i] == github_url: req = Request(port_url) html_page = urlopen(req) soup = BeautifulSoup(html_page, "lxml") links = [] for link in soup.find_all("a"): links.append(link.get('href')) if github_url in links: link_owner.append(grad_name) github_status = 'verified' b_status.append(github_status) else: owner = 'Unknown' link_owner.append('Unknown') github_status = 'Check Manually' b_status.append(github_status) elif prof_links['Professional Links'][i] == linkedin_url: req = Request(port_url) html_page = urlopen(req) req = Request(port_url) html_page = urlopen(req) soup = BeautifulSoup(html_page, "lxml") links = [] for link in soup.find_all('a'): link.get('href') for link in soup.findAll('a'): links.append(link.get('href')) if linkedin_url in links: link_owner.append(grad_name) github_status = 'verified' b_status.append(github_status) else: owner = 'Unknown' link_owner.append('Unknown') github_status = 'Check Manually' b_status.append(github_status) if department == 'Data Science': for i in range(len(prof_links['Professional Links'])): if prof_links['Professional Links'][i] == port_url: reqs = requests.get(port_url) soup = BeautifulSoup(reqs.content, 'html.parser') for title in soup.find_all('title'): heading = title.get_text() if grad_name in heading: owner = grad_name link_owner.append(owner) status = 'Verified' b_status.append(status) else: owner = heading[25:] link_owner.append(owner) status = 'Check Manually' b_status.append(status) elif prof_links['Professional Links'][i] == github_url: req = Request(port_url) html_page = urlopen(req) soup = BeautifulSoup(html_page, "lxml") links = [] for link in soup.find_all("a"): links.append(link.get('href')) if github_url in links: link_owner.append(grad_name) github_status = 'verified' b_status.append(github_status) else: owner = 'Unknown' link_owner.append('Unknown') github_status = 'Check Manually' b_status.append(github_status) elif prof_links['Professional Links'][i] == linkedin_url: req = Request(port_url) html_page = urlopen(req) req = Request(port_url) html_page = urlopen(req) soup = BeautifulSoup(html_page, "lxml") links = [] for link in soup.find_all('a'): link.get('href') for link in soup.findAll('a'): links.append(link.get('href')) if linkedin_url in links: link_owner.append(grad_name) github_status = 'verified' b_status.append(github_status) else: owner = 'Unknown' link_owner.append('Unknown') github_status = 'Check Manually' b_status.append(github_status) proj_link_owner = [] proj_link_status = [] for i in range(len(project_match['Project Links'])): req = Request(port_url) html_page = urlopen(req) soup = BeautifulSoup(html_page, "lxml") links = [] for link in soup.find_all("a"): links.append(link.get('href')) if project_match['Project Links'][i] in links: proj_link_owner.append(grad_name) proj_link_status.append('Verified') else: owner = 'Unknown' proj_link_owner.append('Unknown') # github_status = 'Check Manually' proj_link_status.append('Check Manually') prof_links = pd.DataFrame({'Websites': websites, "Professional Links": match[0:3], "return Code": req_code, "Link Status": link_status,"Link owner": link_owner, 'Verification Status': b_status}) project_match = pd.DataFrame( {"Project Links": match[3:], "Request code": proj_rcode, "Status": proj_stat, 'Project Owner': proj_link_owner, 'Verification Status': proj_link_status}) return grad_name, department, prof_links, project_match # Gradio interface iface = gr.Interface(fn=class_resume, inputs=['file'], outputs=['text', 'text', "dataframe", "dataframe"], title="Resume Screening") iface.launch()