suvo-gh commited on
Commit
1366553
1 Parent(s): 981a21b
Files changed (7) hide show
  1. .DS_Store +0 -0
  2. field.py +7 -0
  3. hi.txt +0 -0
  4. requirement.txt +1 -0
  5. splitter.py +8 -0
  6. student_name.py +7 -0
  7. test.py +223 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
field.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ def branch(text_list):
2
+ for i in range(len(text_list)):
3
+ if text_list[i] == ('Pandas' or 'Neural' or 'Sklearn' or 'Matplotlib' or 'Tensorflow'):
4
+ department = 'Data Science'
5
+ elif text_list[i] == "Django":
6
+ department = 'Web Developer'
7
+ return department
hi.txt DELETED
File without changes
requirement.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ docx2txt==0.8
splitter.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import docx2txt
2
+ import re
3
+ import string
4
+ def split_string(path):
5
+ doc = docx2txt.process(path)
6
+ #global text_list
7
+ res = re.sub('['+string.punctuation+']', '', doc).split()
8
+ return res
student_name.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ def student(text_list):
2
+ name= 'Data' or 'Web' or 'DATA' or 'WEB' or 'data' or 'web'
3
+ if name == True:
4
+ Student_name = text_list[0] + ' ' + text_list[1]
5
+ else:
6
+ Student_name = text_list[0] + ' ' + text_list[1] + ' ' + text_list[2]
7
+ return Student_name
test.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import splitter as sp
2
+ import student_name as sc
3
+ import field
4
+ import bs4
5
+ import docx2txt
6
+ import re
7
+ import requests
8
+ import gradio as gr
9
+ import pandas as pd
10
+ from bs4 import BeautifulSoup
11
+ from urllib.request import Request, urlopen
12
+
13
+
14
+ def class_resume(path):
15
+ text_list = sp.split_string(path) # split all the text data of resume in words
16
+ grad_name = sc.student(text_list) # Find the Student Name®
17
+ department = field.branch(text_list) # Find wheater the student from data science or web development
18
+
19
+ doc = docx2txt.process(path) # Text data of resume
20
+ match = re.findall("(?P<url>https?://[^\s]+)", doc) # find all the urls from the resume
21
+ prof_links = pd.DataFrame(
22
+ {"Professional Links": match[0:3]}) # find professional link - github, linkedin, and portfolio
23
+ project_match = pd.DataFrame(
24
+ {"Project Links": match[3:]}) # Find projects links (websites and github repository link
25
+ link_status = []
26
+ req_code = []
27
+ websites = []
28
+ # Creating a data frame for project links and professional links and see if the links are working or not
29
+ for i in range(len(prof_links["Professional Links"])):
30
+ x = requests.get(prof_links["Professional Links"][i])
31
+ link_status.append(x.reason)
32
+ req_code.append(x.status_code)
33
+
34
+ if department == 'Data Science':
35
+
36
+ for i in range(len(prof_links["Professional Links"])):
37
+ if "github.com" in prof_links["Professional Links"][i]:
38
+ web_name = 'Github'
39
+ websites.append(web_name)
40
+ elif 'github.io' in prof_links["Professional Links"][i]:
41
+ web_name = 'Portfolio'
42
+ websites.append(web_name)
43
+ else:
44
+ web_name = 'Linkedin'
45
+ websites.append(web_name)
46
+ elif department == 'Web Developer':
47
+
48
+ for i in range(len(prof_links["Professional Links"])):
49
+ if "github.com" in prof_links["Professional Links"][i]:
50
+ web_name = 'Github'
51
+ websites.append(web_name)
52
+ elif 'heroku' in prof_links["Professional Links"][i]:
53
+ web_name = 'Portfolio'
54
+ websites.append(web_name)
55
+ else:
56
+ web_name = 'Linkedin'
57
+ websites.append(web_name)
58
+
59
+ prof_links = pd.DataFrame({'Websites': websites, "Professional Links": match[0:3], "return Code": req_code, "Link Status": link_status})
60
+ proj_stat = []
61
+ proj_rcode = []
62
+ owner_name = []
63
+ global port_url
64
+ global github_url
65
+ global linkedin_url
66
+ b_status = list()
67
+ link_owner = []
68
+ for i in range(len(project_match["Project Links"])):
69
+ x = requests.get(project_match["Project Links"][i])
70
+ proj_stat.append(x.reason)
71
+ proj_rcode.append(x.status_code)
72
+
73
+ for i in range(len(prof_links['Professional Links'])):
74
+ if prof_links['Websites'][i] == 'Portfolio':
75
+ port_url = prof_links['Professional Links'][i]
76
+ for i in range(len(prof_links['Professional Links'])):
77
+ if prof_links['Websites'][i] == 'Github':
78
+ github_url = prof_links['Professional Links'][i]
79
+ for i in range(len(prof_links['Professional Links'])):
80
+ if prof_links['Websites'][i] == 'Linkedin':
81
+ linkedin_url = prof_links['Professional Links'][i]
82
+
83
+ if department == 'Web Developer':
84
+ for i in range(len(prof_links['Professional Links'])):
85
+ if prof_links['Professional Links'][i] == port_url:
86
+ reqs = requests.get(port_url)
87
+ soup = BeautifulSoup(reqs.content, 'html.parser')
88
+ stu_name = soup.find(class_="name").get_text()
89
+ if stu_name == grad_name:
90
+ link_owner.append(grad_name)
91
+ else:
92
+ link_owner.append(stu_name)
93
+ if stu_name in grad_name:
94
+ # port_status = 'verified'
95
+ b_status.append('Verified')
96
+ else:
97
+ port_status = 'Check Manually'
98
+ b_status.append(port_status)
99
+ elif prof_links['Professional Links'][i] == github_url:
100
+ req = Request(port_url)
101
+ html_page = urlopen(req)
102
+ soup = BeautifulSoup(html_page, "lxml")
103
+ links = []
104
+ for link in soup.find_all("a"):
105
+ links.append(link.get('href'))
106
+ if github_url in links:
107
+ link_owner.append(grad_name)
108
+ github_status = 'verified'
109
+ b_status.append(github_status)
110
+ else:
111
+ owner = 'Unknown'
112
+ link_owner.append('Unknown')
113
+ github_status = 'Check Manually'
114
+ b_status.append(github_status)
115
+ elif prof_links['Professional Links'][i] == linkedin_url:
116
+ req = Request(port_url)
117
+ html_page = urlopen(req)
118
+ req = Request(port_url)
119
+ html_page = urlopen(req)
120
+ soup = BeautifulSoup(html_page, "lxml")
121
+ links = []
122
+ for link in soup.find_all('a'):
123
+ link.get('href')
124
+ for link in soup.findAll('a'):
125
+ links.append(link.get('href'))
126
+ if linkedin_url in links:
127
+ link_owner.append(grad_name)
128
+ github_status = 'verified'
129
+ b_status.append(github_status)
130
+ else:
131
+ owner = 'Unknown'
132
+ link_owner.append('Unknown')
133
+ github_status = 'Check Manually'
134
+ b_status.append(github_status)
135
+
136
+ if department == 'Data Science':
137
+ for i in range(len(prof_links['Professional Links'])):
138
+ if prof_links['Professional Links'][i] == port_url:
139
+ reqs = requests.get(port_url)
140
+ soup = BeautifulSoup(reqs.content, 'html.parser')
141
+ for title in soup.find_all('title'):
142
+ heading = title.get_text()
143
+ if grad_name in heading:
144
+ owner = grad_name
145
+ link_owner.append(owner)
146
+ status = 'Verified'
147
+ b_status.append(status)
148
+ else:
149
+ owner = heading[25:]
150
+ link_owner.append(owner)
151
+ status = 'Check Manually'
152
+ b_status.append(status)
153
+
154
+ elif prof_links['Professional Links'][i] == github_url:
155
+ req = Request(port_url)
156
+ html_page = urlopen(req)
157
+ soup = BeautifulSoup(html_page, "lxml")
158
+ links = []
159
+ for link in soup.find_all("a"):
160
+ links.append(link.get('href'))
161
+ if github_url in links:
162
+ link_owner.append(grad_name)
163
+ github_status = 'verified'
164
+ b_status.append(github_status)
165
+ else:
166
+ owner = 'Unknown'
167
+ link_owner.append('Unknown')
168
+ github_status = 'Check Manually'
169
+ b_status.append(github_status)
170
+ elif prof_links['Professional Links'][i] == linkedin_url:
171
+ req = Request(port_url)
172
+ html_page = urlopen(req)
173
+ req = Request(port_url)
174
+ html_page = urlopen(req)
175
+
176
+ soup = BeautifulSoup(html_page, "lxml")
177
+ links = []
178
+ for link in soup.find_all('a'):
179
+ link.get('href')
180
+ for link in soup.findAll('a'):
181
+ links.append(link.get('href'))
182
+
183
+ if linkedin_url in links:
184
+ link_owner.append(grad_name)
185
+ github_status = 'verified'
186
+ b_status.append(github_status)
187
+
188
+
189
+ else:
190
+ owner = 'Unknown'
191
+ link_owner.append('Unknown')
192
+ github_status = 'Check Manually'
193
+ b_status.append(github_status)
194
+
195
+ proj_link_owner = []
196
+ proj_link_status = []
197
+ for i in range(len(project_match['Project Links'])):
198
+ req = Request(port_url)
199
+ html_page = urlopen(req)
200
+ soup = BeautifulSoup(html_page, "lxml")
201
+ links = []
202
+ for link in soup.find_all("a"):
203
+ links.append(link.get('href'))
204
+ if project_match['Project Links'][i] in links:
205
+ proj_link_owner.append(grad_name)
206
+ proj_link_status.append('Verified')
207
+ else:
208
+ owner = 'Unknown'
209
+ proj_link_owner.append('Unknown')
210
+ # github_status = 'Check Manually'
211
+ proj_link_status.append('Check Manually')
212
+
213
+ prof_links = pd.DataFrame({'Websites': websites, "Professional Links": match[0:3], "return Code": req_code, "Link Status": link_status,"Link owner": link_owner, 'Verification Status': b_status})
214
+ project_match = pd.DataFrame(
215
+ {"Project Links": match[3:], "Request code": proj_rcode, "Status": proj_stat, 'Project Owner': proj_link_owner,
216
+ 'Verification Status': proj_link_status})
217
+ return grad_name, department, prof_links, project_match
218
+
219
+
220
+ # Gradio interface
221
+ iface = gr.Interface(fn=class_resume, inputs=['file'], outputs=['text', 'text', "dataframe", "dataframe"],
222
+ title="Resume Screening")
223
+ iface.launch()