Spaces:
Runtime error
Runtime error
File size: 9,428 Bytes
1366553 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
import splitter as sp
import student_name as sc
import field
import bs4
import docx2txt
import re
import requests
import gradio as gr
import pandas as pd
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen
def class_resume(path):
text_list = sp.split_string(path) # split all the text data of resume in words
grad_name = sc.student(text_list) # Find the Student Name®
department = field.branch(text_list) # Find wheater the student from data science or web development
doc = docx2txt.process(path) # Text data of resume
match = re.findall("(?P<url>https?://[^\s]+)", doc) # find all the urls from the resume
prof_links = pd.DataFrame(
{"Professional Links": match[0:3]}) # find professional link - github, linkedin, and portfolio
project_match = pd.DataFrame(
{"Project Links": match[3:]}) # Find projects links (websites and github repository link
link_status = []
req_code = []
websites = []
# Creating a data frame for project links and professional links and see if the links are working or not
for i in range(len(prof_links["Professional Links"])):
x = requests.get(prof_links["Professional Links"][i])
link_status.append(x.reason)
req_code.append(x.status_code)
if department == 'Data Science':
for i in range(len(prof_links["Professional Links"])):
if "github.com" in prof_links["Professional Links"][i]:
web_name = 'Github'
websites.append(web_name)
elif 'github.io' in prof_links["Professional Links"][i]:
web_name = 'Portfolio'
websites.append(web_name)
else:
web_name = 'Linkedin'
websites.append(web_name)
elif department == 'Web Developer':
for i in range(len(prof_links["Professional Links"])):
if "github.com" in prof_links["Professional Links"][i]:
web_name = 'Github'
websites.append(web_name)
elif 'heroku' in prof_links["Professional Links"][i]:
web_name = 'Portfolio'
websites.append(web_name)
else:
web_name = 'Linkedin'
websites.append(web_name)
prof_links = pd.DataFrame({'Websites': websites, "Professional Links": match[0:3], "return Code": req_code, "Link Status": link_status})
proj_stat = []
proj_rcode = []
owner_name = []
global port_url
global github_url
global linkedin_url
b_status = list()
link_owner = []
for i in range(len(project_match["Project Links"])):
x = requests.get(project_match["Project Links"][i])
proj_stat.append(x.reason)
proj_rcode.append(x.status_code)
for i in range(len(prof_links['Professional Links'])):
if prof_links['Websites'][i] == 'Portfolio':
port_url = prof_links['Professional Links'][i]
for i in range(len(prof_links['Professional Links'])):
if prof_links['Websites'][i] == 'Github':
github_url = prof_links['Professional Links'][i]
for i in range(len(prof_links['Professional Links'])):
if prof_links['Websites'][i] == 'Linkedin':
linkedin_url = prof_links['Professional Links'][i]
if department == 'Web Developer':
for i in range(len(prof_links['Professional Links'])):
if prof_links['Professional Links'][i] == port_url:
reqs = requests.get(port_url)
soup = BeautifulSoup(reqs.content, 'html.parser')
stu_name = soup.find(class_="name").get_text()
if stu_name == grad_name:
link_owner.append(grad_name)
else:
link_owner.append(stu_name)
if stu_name in grad_name:
# port_status = 'verified'
b_status.append('Verified')
else:
port_status = 'Check Manually'
b_status.append(port_status)
elif prof_links['Professional Links'][i] == github_url:
req = Request(port_url)
html_page = urlopen(req)
soup = BeautifulSoup(html_page, "lxml")
links = []
for link in soup.find_all("a"):
links.append(link.get('href'))
if github_url in links:
link_owner.append(grad_name)
github_status = 'verified'
b_status.append(github_status)
else:
owner = 'Unknown'
link_owner.append('Unknown')
github_status = 'Check Manually'
b_status.append(github_status)
elif prof_links['Professional Links'][i] == linkedin_url:
req = Request(port_url)
html_page = urlopen(req)
req = Request(port_url)
html_page = urlopen(req)
soup = BeautifulSoup(html_page, "lxml")
links = []
for link in soup.find_all('a'):
link.get('href')
for link in soup.findAll('a'):
links.append(link.get('href'))
if linkedin_url in links:
link_owner.append(grad_name)
github_status = 'verified'
b_status.append(github_status)
else:
owner = 'Unknown'
link_owner.append('Unknown')
github_status = 'Check Manually'
b_status.append(github_status)
if department == 'Data Science':
for i in range(len(prof_links['Professional Links'])):
if prof_links['Professional Links'][i] == port_url:
reqs = requests.get(port_url)
soup = BeautifulSoup(reqs.content, 'html.parser')
for title in soup.find_all('title'):
heading = title.get_text()
if grad_name in heading:
owner = grad_name
link_owner.append(owner)
status = 'Verified'
b_status.append(status)
else:
owner = heading[25:]
link_owner.append(owner)
status = 'Check Manually'
b_status.append(status)
elif prof_links['Professional Links'][i] == github_url:
req = Request(port_url)
html_page = urlopen(req)
soup = BeautifulSoup(html_page, "lxml")
links = []
for link in soup.find_all("a"):
links.append(link.get('href'))
if github_url in links:
link_owner.append(grad_name)
github_status = 'verified'
b_status.append(github_status)
else:
owner = 'Unknown'
link_owner.append('Unknown')
github_status = 'Check Manually'
b_status.append(github_status)
elif prof_links['Professional Links'][i] == linkedin_url:
req = Request(port_url)
html_page = urlopen(req)
req = Request(port_url)
html_page = urlopen(req)
soup = BeautifulSoup(html_page, "lxml")
links = []
for link in soup.find_all('a'):
link.get('href')
for link in soup.findAll('a'):
links.append(link.get('href'))
if linkedin_url in links:
link_owner.append(grad_name)
github_status = 'verified'
b_status.append(github_status)
else:
owner = 'Unknown'
link_owner.append('Unknown')
github_status = 'Check Manually'
b_status.append(github_status)
proj_link_owner = []
proj_link_status = []
for i in range(len(project_match['Project Links'])):
req = Request(port_url)
html_page = urlopen(req)
soup = BeautifulSoup(html_page, "lxml")
links = []
for link in soup.find_all("a"):
links.append(link.get('href'))
if project_match['Project Links'][i] in links:
proj_link_owner.append(grad_name)
proj_link_status.append('Verified')
else:
owner = 'Unknown'
proj_link_owner.append('Unknown')
# github_status = 'Check Manually'
proj_link_status.append('Check Manually')
prof_links = pd.DataFrame({'Websites': websites, "Professional Links": match[0:3], "return Code": req_code, "Link Status": link_status,"Link owner": link_owner, 'Verification Status': b_status})
project_match = pd.DataFrame(
{"Project Links": match[3:], "Request code": proj_rcode, "Status": proj_stat, 'Project Owner': proj_link_owner,
'Verification Status': proj_link_status})
return grad_name, department, prof_links, project_match
# Gradio interface
iface = gr.Interface(fn=class_resume, inputs=['file'], outputs=['text', 'text', "dataframe", "dataframe"],
title="Resume Screening")
iface.launch()
|