suvo-gh
app rm
16a983e
import splitter as sp
import student_name as sc
import field
import bs4
import docx2txt
import re
import requests
import gradio as gr
import pandas as pd
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen
def class_resume(path):
text_list = sp.split_string(path) # split all the text data of resume in words
grad_name = sc.student(text_list) # Find the Student Name®
department = field.branch(text_list) # Find wheater the student from data science or web development
doc = docx2txt.process(path) # Text data of resume
match = re.findall("(?P<url>https?://[^\s]+)", doc) # find all the urls from the resume
prof_links = pd.DataFrame(
{"Professional Links": match[0:3]}) # find professional link - github, linkedin, and portfolio
project_match = pd.DataFrame(
{"Project Links": match[3:]}) # Find projects links (websites and github repository link
link_status = []
req_code = []
websites = []
# Creating a data frame for project links and professional links and see if the links are working or not
for i in range(len(prof_links["Professional Links"])):
x = requests.get(prof_links["Professional Links"][i])
link_status.append(x.reason)
req_code.append(x.status_code)
if department == 'Data Science':
for i in range(len(prof_links["Professional Links"])):
if "github.com" in prof_links["Professional Links"][i]:
web_name = 'Github'
websites.append(web_name)
elif 'github.io' in prof_links["Professional Links"][i]:
web_name = 'Portfolio'
websites.append(web_name)
else:
web_name = 'Linkedin'
websites.append(web_name)
elif department == 'Web Developer':
for i in range(len(prof_links["Professional Links"])):
if "github.com" in prof_links["Professional Links"][i]:
web_name = 'Github'
websites.append(web_name)
elif 'heroku' in prof_links["Professional Links"][i]:
web_name = 'Portfolio'
websites.append(web_name)
else:
web_name = 'Linkedin'
websites.append(web_name)
prof_links = pd.DataFrame({'Websites': websites, "Professional Links": match[0:3], "return Code": req_code, "Link Status": link_status})
proj_stat = []
proj_rcode = []
owner_name = []
global port_url
global github_url
global linkedin_url
b_status = list()
link_owner = []
for i in range(len(project_match["Project Links"])):
x = requests.get(project_match["Project Links"][i])
proj_stat.append(x.reason)
proj_rcode.append(x.status_code)
for i in range(len(prof_links['Professional Links'])):
if prof_links['Websites'][i] == 'Portfolio':
port_url = prof_links['Professional Links'][i]
for i in range(len(prof_links['Professional Links'])):
if prof_links['Websites'][i] == 'Github':
github_url = prof_links['Professional Links'][i]
for i in range(len(prof_links['Professional Links'])):
if prof_links['Websites'][i] == 'Linkedin':
linkedin_url = prof_links['Professional Links'][i]
if department == 'Web Developer':
for i in range(len(prof_links['Professional Links'])):
if prof_links['Professional Links'][i] == port_url:
reqs = requests.get(port_url)
soup = BeautifulSoup(reqs.content, 'html.parser')
stu_name = soup.find(class_="name").get_text()
if stu_name == grad_name:
link_owner.append(grad_name)
else:
link_owner.append(stu_name)
if stu_name in grad_name:
# port_status = 'verified'
b_status.append('Verified')
else:
port_status = 'Check Manually'
b_status.append(port_status)
elif prof_links['Professional Links'][i] == github_url:
req = Request(port_url)
html_page = urlopen(req)
soup = BeautifulSoup(html_page, "lxml")
links = []
for link in soup.find_all("a"):
links.append(link.get('href'))
if github_url in links:
link_owner.append(grad_name)
github_status = 'verified'
b_status.append(github_status)
else:
owner = 'Unknown'
link_owner.append('Unknown')
github_status = 'Check Manually'
b_status.append(github_status)
elif prof_links['Professional Links'][i] == linkedin_url:
req = Request(port_url)
html_page = urlopen(req)
req = Request(port_url)
html_page = urlopen(req)
soup = BeautifulSoup(html_page, "lxml")
links = []
for link in soup.find_all('a'):
link.get('href')
for link in soup.findAll('a'):
links.append(link.get('href'))
if linkedin_url in links:
link_owner.append(grad_name)
github_status = 'verified'
b_status.append(github_status)
else:
owner = 'Unknown'
link_owner.append('Unknown')
github_status = 'Check Manually'
b_status.append(github_status)
if department == 'Data Science':
for i in range(len(prof_links['Professional Links'])):
if prof_links['Professional Links'][i] == port_url:
reqs = requests.get(port_url)
soup = BeautifulSoup(reqs.content, 'html.parser')
for title in soup.find_all('title'):
heading = title.get_text()
if grad_name in heading:
owner = grad_name
link_owner.append(owner)
status = 'Verified'
b_status.append(status)
else:
owner = heading[25:]
link_owner.append(owner)
status = 'Check Manually'
b_status.append(status)
elif prof_links['Professional Links'][i] == github_url:
req = Request(port_url)
html_page = urlopen(req)
soup = BeautifulSoup(html_page, "lxml")
links = []
for link in soup.find_all("a"):
links.append(link.get('href'))
if github_url in links:
link_owner.append(grad_name)
github_status = 'verified'
b_status.append(github_status)
else:
owner = 'Unknown'
link_owner.append('Unknown')
github_status = 'Check Manually'
b_status.append(github_status)
elif prof_links['Professional Links'][i] == linkedin_url:
req = Request(port_url)
html_page = urlopen(req)
req = Request(port_url)
html_page = urlopen(req)
soup = BeautifulSoup(html_page, "lxml")
links = []
for link in soup.find_all('a'):
link.get('href')
for link in soup.findAll('a'):
links.append(link.get('href'))
if linkedin_url in links:
link_owner.append(grad_name)
github_status = 'verified'
b_status.append(github_status)
else:
owner = 'Unknown'
link_owner.append('Unknown')
github_status = 'Check Manually'
b_status.append(github_status)
proj_link_owner = []
proj_link_status = []
for i in range(len(project_match['Project Links'])):
req = Request(port_url)
html_page = urlopen(req)
soup = BeautifulSoup(html_page, "lxml")
links = []
for link in soup.find_all("a"):
links.append(link.get('href'))
if project_match['Project Links'][i] in links:
proj_link_owner.append(grad_name)
proj_link_status.append('Verified')
else:
owner = 'Unknown'
proj_link_owner.append('Unknown')
# github_status = 'Check Manually'
proj_link_status.append('Check Manually')
prof_links = pd.DataFrame({'Websites': websites, "Professional Links": match[0:3], "return Code": req_code, "Link Status": link_status,"Link owner": link_owner, 'Verification Status': b_status})
project_match = pd.DataFrame(
{"Project Links": match[3:], "Request code": proj_rcode, "Status": proj_stat, 'Project Owner': proj_link_owner,
'Verification Status': proj_link_status})
return grad_name, department, prof_links, project_match
# Gradio interface
iface = gr.Interface(fn=class_resume, inputs=['file'], outputs=['text', 'text', "dataframe", "dataframe"],
title="Resume Screening")
iface.launch()