Spaces:
Runtime error
Runtime error
suvo-gh
commited on
Commit
•
1366553
1
Parent(s):
981a21b
app added
Browse files- .DS_Store +0 -0
- field.py +7 -0
- hi.txt +0 -0
- requirement.txt +1 -0
- splitter.py +8 -0
- student_name.py +7 -0
- test.py +223 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
field.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def branch(text_list):
|
2 |
+
for i in range(len(text_list)):
|
3 |
+
if text_list[i] == ('Pandas' or 'Neural' or 'Sklearn' or 'Matplotlib' or 'Tensorflow'):
|
4 |
+
department = 'Data Science'
|
5 |
+
elif text_list[i] == "Django":
|
6 |
+
department = 'Web Developer'
|
7 |
+
return department
|
hi.txt
DELETED
File without changes
|
requirement.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
docx2txt==0.8
|
splitter.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import docx2txt
|
2 |
+
import re
|
3 |
+
import string
|
4 |
+
def split_string(path):
|
5 |
+
doc = docx2txt.process(path)
|
6 |
+
#global text_list
|
7 |
+
res = re.sub('['+string.punctuation+']', '', doc).split()
|
8 |
+
return res
|
student_name.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def student(text_list):
|
2 |
+
name= 'Data' or 'Web' or 'DATA' or 'WEB' or 'data' or 'web'
|
3 |
+
if name == True:
|
4 |
+
Student_name = text_list[0] + ' ' + text_list[1]
|
5 |
+
else:
|
6 |
+
Student_name = text_list[0] + ' ' + text_list[1] + ' ' + text_list[2]
|
7 |
+
return Student_name
|
test.py
ADDED
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import splitter as sp
|
2 |
+
import student_name as sc
|
3 |
+
import field
|
4 |
+
import bs4
|
5 |
+
import docx2txt
|
6 |
+
import re
|
7 |
+
import requests
|
8 |
+
import gradio as gr
|
9 |
+
import pandas as pd
|
10 |
+
from bs4 import BeautifulSoup
|
11 |
+
from urllib.request import Request, urlopen
|
12 |
+
|
13 |
+
|
14 |
+
def class_resume(path):
|
15 |
+
text_list = sp.split_string(path) # split all the text data of resume in words
|
16 |
+
grad_name = sc.student(text_list) # Find the Student Name®
|
17 |
+
department = field.branch(text_list) # Find wheater the student from data science or web development
|
18 |
+
|
19 |
+
doc = docx2txt.process(path) # Text data of resume
|
20 |
+
match = re.findall("(?P<url>https?://[^\s]+)", doc) # find all the urls from the resume
|
21 |
+
prof_links = pd.DataFrame(
|
22 |
+
{"Professional Links": match[0:3]}) # find professional link - github, linkedin, and portfolio
|
23 |
+
project_match = pd.DataFrame(
|
24 |
+
{"Project Links": match[3:]}) # Find projects links (websites and github repository link
|
25 |
+
link_status = []
|
26 |
+
req_code = []
|
27 |
+
websites = []
|
28 |
+
# Creating a data frame for project links and professional links and see if the links are working or not
|
29 |
+
for i in range(len(prof_links["Professional Links"])):
|
30 |
+
x = requests.get(prof_links["Professional Links"][i])
|
31 |
+
link_status.append(x.reason)
|
32 |
+
req_code.append(x.status_code)
|
33 |
+
|
34 |
+
if department == 'Data Science':
|
35 |
+
|
36 |
+
for i in range(len(prof_links["Professional Links"])):
|
37 |
+
if "github.com" in prof_links["Professional Links"][i]:
|
38 |
+
web_name = 'Github'
|
39 |
+
websites.append(web_name)
|
40 |
+
elif 'github.io' in prof_links["Professional Links"][i]:
|
41 |
+
web_name = 'Portfolio'
|
42 |
+
websites.append(web_name)
|
43 |
+
else:
|
44 |
+
web_name = 'Linkedin'
|
45 |
+
websites.append(web_name)
|
46 |
+
elif department == 'Web Developer':
|
47 |
+
|
48 |
+
for i in range(len(prof_links["Professional Links"])):
|
49 |
+
if "github.com" in prof_links["Professional Links"][i]:
|
50 |
+
web_name = 'Github'
|
51 |
+
websites.append(web_name)
|
52 |
+
elif 'heroku' in prof_links["Professional Links"][i]:
|
53 |
+
web_name = 'Portfolio'
|
54 |
+
websites.append(web_name)
|
55 |
+
else:
|
56 |
+
web_name = 'Linkedin'
|
57 |
+
websites.append(web_name)
|
58 |
+
|
59 |
+
prof_links = pd.DataFrame({'Websites': websites, "Professional Links": match[0:3], "return Code": req_code, "Link Status": link_status})
|
60 |
+
proj_stat = []
|
61 |
+
proj_rcode = []
|
62 |
+
owner_name = []
|
63 |
+
global port_url
|
64 |
+
global github_url
|
65 |
+
global linkedin_url
|
66 |
+
b_status = list()
|
67 |
+
link_owner = []
|
68 |
+
for i in range(len(project_match["Project Links"])):
|
69 |
+
x = requests.get(project_match["Project Links"][i])
|
70 |
+
proj_stat.append(x.reason)
|
71 |
+
proj_rcode.append(x.status_code)
|
72 |
+
|
73 |
+
for i in range(len(prof_links['Professional Links'])):
|
74 |
+
if prof_links['Websites'][i] == 'Portfolio':
|
75 |
+
port_url = prof_links['Professional Links'][i]
|
76 |
+
for i in range(len(prof_links['Professional Links'])):
|
77 |
+
if prof_links['Websites'][i] == 'Github':
|
78 |
+
github_url = prof_links['Professional Links'][i]
|
79 |
+
for i in range(len(prof_links['Professional Links'])):
|
80 |
+
if prof_links['Websites'][i] == 'Linkedin':
|
81 |
+
linkedin_url = prof_links['Professional Links'][i]
|
82 |
+
|
83 |
+
if department == 'Web Developer':
|
84 |
+
for i in range(len(prof_links['Professional Links'])):
|
85 |
+
if prof_links['Professional Links'][i] == port_url:
|
86 |
+
reqs = requests.get(port_url)
|
87 |
+
soup = BeautifulSoup(reqs.content, 'html.parser')
|
88 |
+
stu_name = soup.find(class_="name").get_text()
|
89 |
+
if stu_name == grad_name:
|
90 |
+
link_owner.append(grad_name)
|
91 |
+
else:
|
92 |
+
link_owner.append(stu_name)
|
93 |
+
if stu_name in grad_name:
|
94 |
+
# port_status = 'verified'
|
95 |
+
b_status.append('Verified')
|
96 |
+
else:
|
97 |
+
port_status = 'Check Manually'
|
98 |
+
b_status.append(port_status)
|
99 |
+
elif prof_links['Professional Links'][i] == github_url:
|
100 |
+
req = Request(port_url)
|
101 |
+
html_page = urlopen(req)
|
102 |
+
soup = BeautifulSoup(html_page, "lxml")
|
103 |
+
links = []
|
104 |
+
for link in soup.find_all("a"):
|
105 |
+
links.append(link.get('href'))
|
106 |
+
if github_url in links:
|
107 |
+
link_owner.append(grad_name)
|
108 |
+
github_status = 'verified'
|
109 |
+
b_status.append(github_status)
|
110 |
+
else:
|
111 |
+
owner = 'Unknown'
|
112 |
+
link_owner.append('Unknown')
|
113 |
+
github_status = 'Check Manually'
|
114 |
+
b_status.append(github_status)
|
115 |
+
elif prof_links['Professional Links'][i] == linkedin_url:
|
116 |
+
req = Request(port_url)
|
117 |
+
html_page = urlopen(req)
|
118 |
+
req = Request(port_url)
|
119 |
+
html_page = urlopen(req)
|
120 |
+
soup = BeautifulSoup(html_page, "lxml")
|
121 |
+
links = []
|
122 |
+
for link in soup.find_all('a'):
|
123 |
+
link.get('href')
|
124 |
+
for link in soup.findAll('a'):
|
125 |
+
links.append(link.get('href'))
|
126 |
+
if linkedin_url in links:
|
127 |
+
link_owner.append(grad_name)
|
128 |
+
github_status = 'verified'
|
129 |
+
b_status.append(github_status)
|
130 |
+
else:
|
131 |
+
owner = 'Unknown'
|
132 |
+
link_owner.append('Unknown')
|
133 |
+
github_status = 'Check Manually'
|
134 |
+
b_status.append(github_status)
|
135 |
+
|
136 |
+
if department == 'Data Science':
|
137 |
+
for i in range(len(prof_links['Professional Links'])):
|
138 |
+
if prof_links['Professional Links'][i] == port_url:
|
139 |
+
reqs = requests.get(port_url)
|
140 |
+
soup = BeautifulSoup(reqs.content, 'html.parser')
|
141 |
+
for title in soup.find_all('title'):
|
142 |
+
heading = title.get_text()
|
143 |
+
if grad_name in heading:
|
144 |
+
owner = grad_name
|
145 |
+
link_owner.append(owner)
|
146 |
+
status = 'Verified'
|
147 |
+
b_status.append(status)
|
148 |
+
else:
|
149 |
+
owner = heading[25:]
|
150 |
+
link_owner.append(owner)
|
151 |
+
status = 'Check Manually'
|
152 |
+
b_status.append(status)
|
153 |
+
|
154 |
+
elif prof_links['Professional Links'][i] == github_url:
|
155 |
+
req = Request(port_url)
|
156 |
+
html_page = urlopen(req)
|
157 |
+
soup = BeautifulSoup(html_page, "lxml")
|
158 |
+
links = []
|
159 |
+
for link in soup.find_all("a"):
|
160 |
+
links.append(link.get('href'))
|
161 |
+
if github_url in links:
|
162 |
+
link_owner.append(grad_name)
|
163 |
+
github_status = 'verified'
|
164 |
+
b_status.append(github_status)
|
165 |
+
else:
|
166 |
+
owner = 'Unknown'
|
167 |
+
link_owner.append('Unknown')
|
168 |
+
github_status = 'Check Manually'
|
169 |
+
b_status.append(github_status)
|
170 |
+
elif prof_links['Professional Links'][i] == linkedin_url:
|
171 |
+
req = Request(port_url)
|
172 |
+
html_page = urlopen(req)
|
173 |
+
req = Request(port_url)
|
174 |
+
html_page = urlopen(req)
|
175 |
+
|
176 |
+
soup = BeautifulSoup(html_page, "lxml")
|
177 |
+
links = []
|
178 |
+
for link in soup.find_all('a'):
|
179 |
+
link.get('href')
|
180 |
+
for link in soup.findAll('a'):
|
181 |
+
links.append(link.get('href'))
|
182 |
+
|
183 |
+
if linkedin_url in links:
|
184 |
+
link_owner.append(grad_name)
|
185 |
+
github_status = 'verified'
|
186 |
+
b_status.append(github_status)
|
187 |
+
|
188 |
+
|
189 |
+
else:
|
190 |
+
owner = 'Unknown'
|
191 |
+
link_owner.append('Unknown')
|
192 |
+
github_status = 'Check Manually'
|
193 |
+
b_status.append(github_status)
|
194 |
+
|
195 |
+
proj_link_owner = []
|
196 |
+
proj_link_status = []
|
197 |
+
for i in range(len(project_match['Project Links'])):
|
198 |
+
req = Request(port_url)
|
199 |
+
html_page = urlopen(req)
|
200 |
+
soup = BeautifulSoup(html_page, "lxml")
|
201 |
+
links = []
|
202 |
+
for link in soup.find_all("a"):
|
203 |
+
links.append(link.get('href'))
|
204 |
+
if project_match['Project Links'][i] in links:
|
205 |
+
proj_link_owner.append(grad_name)
|
206 |
+
proj_link_status.append('Verified')
|
207 |
+
else:
|
208 |
+
owner = 'Unknown'
|
209 |
+
proj_link_owner.append('Unknown')
|
210 |
+
# github_status = 'Check Manually'
|
211 |
+
proj_link_status.append('Check Manually')
|
212 |
+
|
213 |
+
prof_links = pd.DataFrame({'Websites': websites, "Professional Links": match[0:3], "return Code": req_code, "Link Status": link_status,"Link owner": link_owner, 'Verification Status': b_status})
|
214 |
+
project_match = pd.DataFrame(
|
215 |
+
{"Project Links": match[3:], "Request code": proj_rcode, "Status": proj_stat, 'Project Owner': proj_link_owner,
|
216 |
+
'Verification Status': proj_link_status})
|
217 |
+
return grad_name, department, prof_links, project_match
|
218 |
+
|
219 |
+
|
220 |
+
# Gradio interface
|
221 |
+
iface = gr.Interface(fn=class_resume, inputs=['file'], outputs=['text', 'text', "dataframe", "dataframe"],
|
222 |
+
title="Resume Screening")
|
223 |
+
iface.launch()
|