Spaces:

Techis
/

resume-screening-tool

Runtime error

suvo-gh

app rm

16a983e over 2 years ago

No virus

9.43 kB

	import splitter as sp
	import student_name as sc
	import field
	import bs4
	import docx2txt
	import re
	import requests
	import gradio as gr
	import pandas as pd
	from bs4 import BeautifulSoup
	from urllib.request import Request, urlopen


	def class_resume(path):
	text_list = sp.split_string(path) # split all the text data of resume in words
	grad_name = sc.student(text_list) # Find the Student Name®
	department = field.branch(text_list) # Find wheater the student from data science or web development

	doc = docx2txt.process(path) # Text data of resume
	match = re.findall("(?P<url>https?://[^\s]+)", doc) # find all the urls from the resume
	prof_links = pd.DataFrame(
	{"Professional Links": match[0:3]}) # find professional link - github, linkedin, and portfolio
	project_match = pd.DataFrame(
	{"Project Links": match[3:]}) # Find projects links (websites and github repository link
	link_status = []
	req_code = []
	websites = []
	# Creating a data frame for project links and professional links and see if the links are working or not
	for i in range(len(prof_links["Professional Links"])):
	x = requests.get(prof_links["Professional Links"][i])
	link_status.append(x.reason)
	req_code.append(x.status_code)

	if department == 'Data Science':

	for i in range(len(prof_links["Professional Links"])):
	if "github.com" in prof_links["Professional Links"][i]:
	web_name = 'Github'
	websites.append(web_name)
	elif 'github.io' in prof_links["Professional Links"][i]:
	web_name = 'Portfolio'
	websites.append(web_name)
	else:
	web_name = 'Linkedin'
	websites.append(web_name)
	elif department == 'Web Developer':

	for i in range(len(prof_links["Professional Links"])):
	if "github.com" in prof_links["Professional Links"][i]:
	web_name = 'Github'
	websites.append(web_name)
	elif 'heroku' in prof_links["Professional Links"][i]:
	web_name = 'Portfolio'
	websites.append(web_name)
	else:
	web_name = 'Linkedin'
	websites.append(web_name)

	prof_links = pd.DataFrame({'Websites': websites, "Professional Links": match[0:3], "return Code": req_code, "Link Status": link_status})
	proj_stat = []
	proj_rcode = []
	owner_name = []
	global port_url
	global github_url
	global linkedin_url
	b_status = list()
	link_owner = []
	for i in range(len(project_match["Project Links"])):
	x = requests.get(project_match["Project Links"][i])
	proj_stat.append(x.reason)
	proj_rcode.append(x.status_code)

	for i in range(len(prof_links['Professional Links'])):
	if prof_links['Websites'][i] == 'Portfolio':
	port_url = prof_links['Professional Links'][i]
	for i in range(len(prof_links['Professional Links'])):
	if prof_links['Websites'][i] == 'Github':
	github_url = prof_links['Professional Links'][i]
	for i in range(len(prof_links['Professional Links'])):
	if prof_links['Websites'][i] == 'Linkedin':
	linkedin_url = prof_links['Professional Links'][i]

	if department == 'Web Developer':
	for i in range(len(prof_links['Professional Links'])):
	if prof_links['Professional Links'][i] == port_url:
	reqs = requests.get(port_url)
	soup = BeautifulSoup(reqs.content, 'html.parser')
	stu_name = soup.find(class_="name").get_text()
	if stu_name == grad_name:
	link_owner.append(grad_name)
	else:
	link_owner.append(stu_name)
	if stu_name in grad_name:
	# port_status = 'verified'
	b_status.append('Verified')
	else:
	port_status = 'Check Manually'
	b_status.append(port_status)
	elif prof_links['Professional Links'][i] == github_url:
	req = Request(port_url)
	html_page = urlopen(req)
	soup = BeautifulSoup(html_page, "lxml")
	links = []
	for link in soup.find_all("a"):
	links.append(link.get('href'))
	if github_url in links:
	link_owner.append(grad_name)
	github_status = 'verified'
	b_status.append(github_status)
	else:
	owner = 'Unknown'
	link_owner.append('Unknown')
	github_status = 'Check Manually'
	b_status.append(github_status)
	elif prof_links['Professional Links'][i] == linkedin_url:
	req = Request(port_url)
	html_page = urlopen(req)
	req = Request(port_url)
	html_page = urlopen(req)
	soup = BeautifulSoup(html_page, "lxml")
	links = []
	for link in soup.find_all('a'):
	link.get('href')
	for link in soup.findAll('a'):
	links.append(link.get('href'))
	if linkedin_url in links:
	link_owner.append(grad_name)
	github_status = 'verified'
	b_status.append(github_status)
	else:
	owner = 'Unknown'
	link_owner.append('Unknown')
	github_status = 'Check Manually'
	b_status.append(github_status)

	if department == 'Data Science':
	for i in range(len(prof_links['Professional Links'])):
	if prof_links['Professional Links'][i] == port_url:
	reqs = requests.get(port_url)
	soup = BeautifulSoup(reqs.content, 'html.parser')
	for title in soup.find_all('title'):
	heading = title.get_text()
	if grad_name in heading:
	owner = grad_name
	link_owner.append(owner)
	status = 'Verified'
	b_status.append(status)
	else:
	owner = heading[25:]
	link_owner.append(owner)
	status = 'Check Manually'
	b_status.append(status)

	elif prof_links['Professional Links'][i] == github_url:
	req = Request(port_url)
	html_page = urlopen(req)
	soup = BeautifulSoup(html_page, "lxml")
	links = []
	for link in soup.find_all("a"):
	links.append(link.get('href'))
	if github_url in links:
	link_owner.append(grad_name)
	github_status = 'verified'
	b_status.append(github_status)
	else:
	owner = 'Unknown'
	link_owner.append('Unknown')
	github_status = 'Check Manually'
	b_status.append(github_status)
	elif prof_links['Professional Links'][i] == linkedin_url:
	req = Request(port_url)
	html_page = urlopen(req)
	req = Request(port_url)
	html_page = urlopen(req)

	soup = BeautifulSoup(html_page, "lxml")
	links = []
	for link in soup.find_all('a'):
	link.get('href')
	for link in soup.findAll('a'):
	links.append(link.get('href'))

	if linkedin_url in links:
	link_owner.append(grad_name)
	github_status = 'verified'
	b_status.append(github_status)


	else:
	owner = 'Unknown'
	link_owner.append('Unknown')
	github_status = 'Check Manually'
	b_status.append(github_status)

	proj_link_owner = []
	proj_link_status = []
	for i in range(len(project_match['Project Links'])):
	req = Request(port_url)
	html_page = urlopen(req)
	soup = BeautifulSoup(html_page, "lxml")
	links = []
	for link in soup.find_all("a"):
	links.append(link.get('href'))
	if project_match['Project Links'][i] in links:
	proj_link_owner.append(grad_name)
	proj_link_status.append('Verified')
	else:
	owner = 'Unknown'
	proj_link_owner.append('Unknown')
	# github_status = 'Check Manually'
	proj_link_status.append('Check Manually')

	prof_links = pd.DataFrame({'Websites': websites, "Professional Links": match[0:3], "return Code": req_code, "Link Status": link_status,"Link owner": link_owner, 'Verification Status': b_status})
	project_match = pd.DataFrame(
	{"Project Links": match[3:], "Request code": proj_rcode, "Status": proj_stat, 'Project Owner': proj_link_owner,
	'Verification Status': proj_link_status})
	return grad_name, department, prof_links, project_match


	# Gradio interface
	iface = gr.Interface(fn=class_resume, inputs=['file'], outputs=['text', 'text', "dataframe", "dataframe"],
	title="Resume Screening")
	iface.launch()