File size: 9,428 Bytes
1366553
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
import splitter as sp
import student_name as sc
import field
import bs4
import docx2txt
import re
import requests
import gradio as gr
import pandas as pd
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen


def class_resume(path):
    text_list = sp.split_string(path)  # split all the text data of resume in words
    grad_name = sc.student(text_list)  # Find the Student Name®
    department = field.branch(text_list)  # Find wheater the student from data science or web development

    doc = docx2txt.process(path)  # Text data of resume
    match = re.findall("(?P<url>https?://[^\s]+)", doc)  # find all the urls from the resume
    prof_links = pd.DataFrame(
        {"Professional Links": match[0:3]})  # find professional link - github, linkedin, and portfolio
    project_match = pd.DataFrame(
        {"Project Links": match[3:]})  # Find projects links (websites and github repository link
    link_status = []
    req_code = []
    websites = []
    # Creating a data frame for project links and professional links and see if the links are working or not
    for i in range(len(prof_links["Professional Links"])):
        x = requests.get(prof_links["Professional Links"][i])
        link_status.append(x.reason)
        req_code.append(x.status_code)

    if department == 'Data Science':

        for i in range(len(prof_links["Professional Links"])):
            if "github.com" in prof_links["Professional Links"][i]:
                web_name = 'Github'
                websites.append(web_name)
            elif 'github.io' in prof_links["Professional Links"][i]:
                web_name = 'Portfolio'
                websites.append(web_name)
            else:
                web_name = 'Linkedin'
                websites.append(web_name)
    elif department == 'Web Developer':

        for i in range(len(prof_links["Professional Links"])):
            if "github.com" in prof_links["Professional Links"][i]:
                web_name = 'Github'
                websites.append(web_name)
            elif 'heroku' in prof_links["Professional Links"][i]:
                web_name = 'Portfolio'
                websites.append(web_name)
            else:
                web_name = 'Linkedin'
                websites.append(web_name)

    prof_links = pd.DataFrame({'Websites': websites, "Professional Links": match[0:3], "return Code": req_code, "Link Status": link_status})
    proj_stat = []
    proj_rcode = []
    owner_name = []
    global port_url
    global github_url
    global linkedin_url
    b_status = list()
    link_owner = []
    for i in range(len(project_match["Project Links"])):
        x = requests.get(project_match["Project Links"][i])
        proj_stat.append(x.reason)
        proj_rcode.append(x.status_code)

    for i in range(len(prof_links['Professional Links'])):
        if prof_links['Websites'][i] == 'Portfolio':
            port_url = prof_links['Professional Links'][i]
    for i in range(len(prof_links['Professional Links'])):
        if prof_links['Websites'][i] == 'Github':
            github_url = prof_links['Professional Links'][i]
    for i in range(len(prof_links['Professional Links'])):
        if prof_links['Websites'][i] == 'Linkedin':
            linkedin_url = prof_links['Professional Links'][i]

    if department == 'Web Developer':
        for i in range(len(prof_links['Professional Links'])):
            if prof_links['Professional Links'][i] == port_url:
                reqs = requests.get(port_url)
                soup = BeautifulSoup(reqs.content, 'html.parser')
                stu_name = soup.find(class_="name").get_text()
                if stu_name == grad_name:
                    link_owner.append(grad_name)
                else:
                    link_owner.append(stu_name)
                if stu_name in grad_name:
                    # port_status = 'verified'
                    b_status.append('Verified')
                else:
                    port_status = 'Check Manually'
                    b_status.append(port_status)
            elif prof_links['Professional Links'][i] == github_url:
                req = Request(port_url)
                html_page = urlopen(req)
                soup = BeautifulSoup(html_page, "lxml")
                links = []
                for link in soup.find_all("a"):
                    links.append(link.get('href'))
                if github_url in links:
                    link_owner.append(grad_name)
                    github_status = 'verified'
                    b_status.append(github_status)
                else:
                    owner = 'Unknown'
                    link_owner.append('Unknown')
                    github_status = 'Check Manually'
                    b_status.append(github_status)
            elif prof_links['Professional Links'][i] == linkedin_url:
                req = Request(port_url)
                html_page = urlopen(req)
                req = Request(port_url)
                html_page = urlopen(req)
                soup = BeautifulSoup(html_page, "lxml")
                links = []
                for link in soup.find_all('a'):
                    link.get('href')
                for link in soup.findAll('a'):
                    links.append(link.get('href'))
                if linkedin_url in links:
                    link_owner.append(grad_name)
                    github_status = 'verified'
                    b_status.append(github_status)
                else:
                    owner = 'Unknown'
                    link_owner.append('Unknown')
                    github_status = 'Check Manually'
                    b_status.append(github_status)

    if department == 'Data Science':
        for i in range(len(prof_links['Professional Links'])):
            if prof_links['Professional Links'][i] == port_url:
                reqs = requests.get(port_url)
                soup = BeautifulSoup(reqs.content, 'html.parser')
                for title in soup.find_all('title'):
                    heading = title.get_text()
                if grad_name in heading:
                    owner = grad_name
                    link_owner.append(owner)
                    status = 'Verified'
                    b_status.append(status)
                else:
                    owner = heading[25:]
                    link_owner.append(owner)
                    status = 'Check Manually'
                    b_status.append(status)

            elif prof_links['Professional Links'][i] == github_url:
                req = Request(port_url)
                html_page = urlopen(req)
                soup = BeautifulSoup(html_page, "lxml")
                links = []
                for link in soup.find_all("a"):
                    links.append(link.get('href'))
                if github_url in links:
                    link_owner.append(grad_name)
                    github_status = 'verified'
                    b_status.append(github_status)
                else:
                    owner = 'Unknown'
                    link_owner.append('Unknown')
                    github_status = 'Check Manually'
                    b_status.append(github_status)
            elif prof_links['Professional Links'][i] == linkedin_url:
                req = Request(port_url)
                html_page = urlopen(req)
                req = Request(port_url)
                html_page = urlopen(req)

                soup = BeautifulSoup(html_page, "lxml")
                links = []
                for link in soup.find_all('a'):
                    link.get('href')
                for link in soup.findAll('a'):
                    links.append(link.get('href'))

                if linkedin_url in links:
                    link_owner.append(grad_name)
                    github_status = 'verified'
                    b_status.append(github_status)


                else:
                    owner = 'Unknown'
                    link_owner.append('Unknown')
                    github_status = 'Check Manually'
                    b_status.append(github_status)

    proj_link_owner = []
    proj_link_status = []
    for i in range(len(project_match['Project Links'])):
        req = Request(port_url)
        html_page = urlopen(req)
        soup = BeautifulSoup(html_page, "lxml")
        links = []
        for link in soup.find_all("a"):
            links.append(link.get('href'))
        if project_match['Project Links'][i] in links:
            proj_link_owner.append(grad_name)
            proj_link_status.append('Verified')
        else:
            owner = 'Unknown'
            proj_link_owner.append('Unknown')
            # github_status = 'Check Manually'
            proj_link_status.append('Check Manually')

    prof_links = pd.DataFrame({'Websites': websites, "Professional Links": match[0:3], "return Code": req_code, "Link Status": link_status,"Link owner": link_owner, 'Verification Status': b_status})
    project_match = pd.DataFrame(
        {"Project Links": match[3:], "Request code": proj_rcode, "Status": proj_stat, 'Project Owner': proj_link_owner,
         'Verification Status': proj_link_status})
    return grad_name, department, prof_links, project_match


# Gradio interface
iface = gr.Interface(fn=class_resume, inputs=['file'], outputs=['text', 'text', "dataframe", "dataframe"],
                     title="Resume Screening")
iface.launch()