Spaces:
Runtime error
Runtime error
import splitter as sp | |
import student_name as sc | |
import field | |
import bs4 | |
import docx2txt | |
import re | |
import requests | |
import gradio as gr | |
import pandas as pd | |
from bs4 import BeautifulSoup | |
from urllib.request import Request, urlopen | |
def class_resume(path): | |
text_list = sp.split_string(path) # split all the text data of resume in words | |
grad_name = sc.student(text_list) # Find the Student Name® | |
department = field.branch(text_list) # Find wheater the student from data science or web development | |
doc = docx2txt.process(path) # Text data of resume | |
match = re.findall("(?P<url>https?://[^\s]+)", doc) # find all the urls from the resume | |
prof_links = pd.DataFrame( | |
{"Professional Links": match[0:3]}) # find professional link - github, linkedin, and portfolio | |
project_match = pd.DataFrame( | |
{"Project Links": match[3:]}) # Find projects links (websites and github repository link | |
link_status = [] | |
req_code = [] | |
websites = [] | |
# Creating a data frame for project links and professional links and see if the links are working or not | |
for i in range(len(prof_links["Professional Links"])): | |
x = requests.get(prof_links["Professional Links"][i]) | |
link_status.append(x.reason) | |
req_code.append(x.status_code) | |
if department == 'Data Science': | |
for i in range(len(prof_links["Professional Links"])): | |
if "github.com" in prof_links["Professional Links"][i]: | |
web_name = 'Github' | |
websites.append(web_name) | |
elif 'github.io' in prof_links["Professional Links"][i]: | |
web_name = 'Portfolio' | |
websites.append(web_name) | |
else: | |
web_name = 'Linkedin' | |
websites.append(web_name) | |
elif department == 'Web Developer': | |
for i in range(len(prof_links["Professional Links"])): | |
if "github.com" in prof_links["Professional Links"][i]: | |
web_name = 'Github' | |
websites.append(web_name) | |
elif 'heroku' in prof_links["Professional Links"][i]: | |
web_name = 'Portfolio' | |
websites.append(web_name) | |
else: | |
web_name = 'Linkedin' | |
websites.append(web_name) | |
prof_links = pd.DataFrame({'Websites': websites, "Professional Links": match[0:3], "return Code": req_code, "Link Status": link_status}) | |
proj_stat = [] | |
proj_rcode = [] | |
owner_name = [] | |
global port_url | |
global github_url | |
global linkedin_url | |
b_status = list() | |
link_owner = [] | |
for i in range(len(project_match["Project Links"])): | |
x = requests.get(project_match["Project Links"][i]) | |
proj_stat.append(x.reason) | |
proj_rcode.append(x.status_code) | |
for i in range(len(prof_links['Professional Links'])): | |
if prof_links['Websites'][i] == 'Portfolio': | |
port_url = prof_links['Professional Links'][i] | |
for i in range(len(prof_links['Professional Links'])): | |
if prof_links['Websites'][i] == 'Github': | |
github_url = prof_links['Professional Links'][i] | |
for i in range(len(prof_links['Professional Links'])): | |
if prof_links['Websites'][i] == 'Linkedin': | |
linkedin_url = prof_links['Professional Links'][i] | |
if department == 'Web Developer': | |
for i in range(len(prof_links['Professional Links'])): | |
if prof_links['Professional Links'][i] == port_url: | |
reqs = requests.get(port_url) | |
soup = BeautifulSoup(reqs.content, 'html.parser') | |
stu_name = soup.find(class_="name").get_text() | |
if stu_name == grad_name: | |
link_owner.append(grad_name) | |
else: | |
link_owner.append(stu_name) | |
if stu_name in grad_name: | |
# port_status = 'verified' | |
b_status.append('Verified') | |
else: | |
port_status = 'Check Manually' | |
b_status.append(port_status) | |
elif prof_links['Professional Links'][i] == github_url: | |
req = Request(port_url) | |
html_page = urlopen(req) | |
soup = BeautifulSoup(html_page, "lxml") | |
links = [] | |
for link in soup.find_all("a"): | |
links.append(link.get('href')) | |
if github_url in links: | |
link_owner.append(grad_name) | |
github_status = 'verified' | |
b_status.append(github_status) | |
else: | |
owner = 'Unknown' | |
link_owner.append('Unknown') | |
github_status = 'Check Manually' | |
b_status.append(github_status) | |
elif prof_links['Professional Links'][i] == linkedin_url: | |
req = Request(port_url) | |
html_page = urlopen(req) | |
req = Request(port_url) | |
html_page = urlopen(req) | |
soup = BeautifulSoup(html_page, "lxml") | |
links = [] | |
for link in soup.find_all('a'): | |
link.get('href') | |
for link in soup.findAll('a'): | |
links.append(link.get('href')) | |
if linkedin_url in links: | |
link_owner.append(grad_name) | |
github_status = 'verified' | |
b_status.append(github_status) | |
else: | |
owner = 'Unknown' | |
link_owner.append('Unknown') | |
github_status = 'Check Manually' | |
b_status.append(github_status) | |
if department == 'Data Science': | |
for i in range(len(prof_links['Professional Links'])): | |
if prof_links['Professional Links'][i] == port_url: | |
reqs = requests.get(port_url) | |
soup = BeautifulSoup(reqs.content, 'html.parser') | |
for title in soup.find_all('title'): | |
heading = title.get_text() | |
if grad_name in heading: | |
owner = grad_name | |
link_owner.append(owner) | |
status = 'Verified' | |
b_status.append(status) | |
else: | |
owner = heading[25:] | |
link_owner.append(owner) | |
status = 'Check Manually' | |
b_status.append(status) | |
elif prof_links['Professional Links'][i] == github_url: | |
req = Request(port_url) | |
html_page = urlopen(req) | |
soup = BeautifulSoup(html_page, "lxml") | |
links = [] | |
for link in soup.find_all("a"): | |
links.append(link.get('href')) | |
if github_url in links: | |
link_owner.append(grad_name) | |
github_status = 'verified' | |
b_status.append(github_status) | |
else: | |
owner = 'Unknown' | |
link_owner.append('Unknown') | |
github_status = 'Check Manually' | |
b_status.append(github_status) | |
elif prof_links['Professional Links'][i] == linkedin_url: | |
req = Request(port_url) | |
html_page = urlopen(req) | |
req = Request(port_url) | |
html_page = urlopen(req) | |
soup = BeautifulSoup(html_page, "lxml") | |
links = [] | |
for link in soup.find_all('a'): | |
link.get('href') | |
for link in soup.findAll('a'): | |
links.append(link.get('href')) | |
if linkedin_url in links: | |
link_owner.append(grad_name) | |
github_status = 'verified' | |
b_status.append(github_status) | |
else: | |
owner = 'Unknown' | |
link_owner.append('Unknown') | |
github_status = 'Check Manually' | |
b_status.append(github_status) | |
proj_link_owner = [] | |
proj_link_status = [] | |
for i in range(len(project_match['Project Links'])): | |
req = Request(port_url) | |
html_page = urlopen(req) | |
soup = BeautifulSoup(html_page, "lxml") | |
links = [] | |
for link in soup.find_all("a"): | |
links.append(link.get('href')) | |
if project_match['Project Links'][i] in links: | |
proj_link_owner.append(grad_name) | |
proj_link_status.append('Verified') | |
else: | |
owner = 'Unknown' | |
proj_link_owner.append('Unknown') | |
# github_status = 'Check Manually' | |
proj_link_status.append('Check Manually') | |
prof_links = pd.DataFrame({'Websites': websites, "Professional Links": match[0:3], "return Code": req_code, "Link Status": link_status,"Link owner": link_owner, 'Verification Status': b_status}) | |
project_match = pd.DataFrame( | |
{"Project Links": match[3:], "Request code": proj_rcode, "Status": proj_stat, 'Project Owner': proj_link_owner, | |
'Verification Status': proj_link_status}) | |
return grad_name, department, prof_links, project_match | |
# Gradio interface | |
iface = gr.Interface(fn=class_resume, inputs=['file'], outputs=['text', 'text', "dataframe", "dataframe"], | |
title="Resume Screening") | |
iface.launch() | |