Spaces:

Techis
/

resume-screening-tool

Runtime error

App Files Files Community

suvo-gh commited on Apr 1, 2022

Commit

1366553

•

1 Parent(s): 981a21b

app added

Browse files

Files changed (7) hide show

.DS_Store +0 -0
field.py +7 -0
hi.txt +0 -0
requirement.txt +1 -0
splitter.py +8 -0
student_name.py +7 -0
test.py +223 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

field.py ADDED Viewed

	@@ -0,0 +1,7 @@

+def branch(text_list):
+    for i in range(len(text_list)):
+        if text_list[i] == ('Pandas' or 'Neural' or 'Sklearn' or 'Matplotlib' or 'Tensorflow'):
+            department = 'Data Science'
+        elif text_list[i] == "Django":
+            department = 'Web Developer'
+    return department

hi.txt DELETED Viewed

File without changes

requirement.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ docx2txt==0.8

splitter.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import docx2txt
+import re
+import string
+def split_string(path):
+    doc = docx2txt.process(path)
+    #global text_list
+    res = re.sub('['+string.punctuation+']', '', doc).split()
+    return res

student_name.py ADDED Viewed

	@@ -0,0 +1,7 @@

+def student(text_list):
+    name= 'Data' or 'Web' or 'DATA' or 'WEB' or 'data' or 'web'
+    if name == True:
+        Student_name = text_list[0] + ' ' + text_list[1]
+    else:
+        Student_name = text_list[0] + ' ' + text_list[1] + ' ' + text_list[2]
+    return Student_name

test.py ADDED Viewed

	@@ -0,0 +1,223 @@

+import splitter as sp
+import student_name as sc
+import field
+import bs4
+import docx2txt
+import re
+import requests
+import gradio as gr
+import pandas as pd
+from bs4 import BeautifulSoup
+from urllib.request import Request, urlopen
+def class_resume(path):
+    text_list = sp.split_string(path)  # split all the text data of resume in words
+    grad_name = sc.student(text_list)  # Find the Student Name®
+    department = field.branch(text_list)  # Find wheater the student from data science or web development
+    doc = docx2txt.process(path)  # Text data of resume
+    match = re.findall("(?P<url>https?://[^\s]+)", doc)  # find all the urls from the resume
+    prof_links = pd.DataFrame(
+        {"Professional Links": match[0:3]})  # find professional link - github, linkedin, and portfolio
+    project_match = pd.DataFrame(
+        {"Project Links": match[3:]})  # Find projects links (websites and github repository link
+    link_status = []
+    req_code = []
+    websites = []
+    # Creating a data frame for project links and professional links and see if the links are working or not
+    for i in range(len(prof_links["Professional Links"])):
+        x = requests.get(prof_links["Professional Links"][i])
+        link_status.append(x.reason)
+        req_code.append(x.status_code)
+    if department == 'Data Science':
+        for i in range(len(prof_links["Professional Links"])):
+            if "github.com" in prof_links["Professional Links"][i]:
+                web_name = 'Github'
+                websites.append(web_name)
+            elif 'github.io' in prof_links["Professional Links"][i]:
+                web_name = 'Portfolio'
+                websites.append(web_name)
+            else:
+                web_name = 'Linkedin'
+                websites.append(web_name)
+    elif department == 'Web Developer':
+        for i in range(len(prof_links["Professional Links"])):
+            if "github.com" in prof_links["Professional Links"][i]:
+                web_name = 'Github'
+                websites.append(web_name)
+            elif 'heroku' in prof_links["Professional Links"][i]:
+                web_name = 'Portfolio'
+                websites.append(web_name)
+            else:
+                web_name = 'Linkedin'
+                websites.append(web_name)
+    prof_links = pd.DataFrame({'Websites': websites, "Professional Links": match[0:3], "return Code": req_code, "Link Status": link_status})
+    proj_stat = []
+    proj_rcode = []
+    owner_name = []
+    global port_url
+    global github_url
+    global linkedin_url
+    b_status = list()
+    link_owner = []
+    for i in range(len(project_match["Project Links"])):
+        x = requests.get(project_match["Project Links"][i])
+        proj_stat.append(x.reason)
+        proj_rcode.append(x.status_code)
+    for i in range(len(prof_links['Professional Links'])):
+        if prof_links['Websites'][i] == 'Portfolio':
+            port_url = prof_links['Professional Links'][i]
+    for i in range(len(prof_links['Professional Links'])):
+        if prof_links['Websites'][i] == 'Github':
+            github_url = prof_links['Professional Links'][i]
+    for i in range(len(prof_links['Professional Links'])):
+        if prof_links['Websites'][i] == 'Linkedin':
+            linkedin_url = prof_links['Professional Links'][i]
+    if department == 'Web Developer':
+        for i in range(len(prof_links['Professional Links'])):
+            if prof_links['Professional Links'][i] == port_url:
+                reqs = requests.get(port_url)
+                soup = BeautifulSoup(reqs.content, 'html.parser')
+                stu_name = soup.find(class_="name").get_text()
+                if stu_name == grad_name:
+                    link_owner.append(grad_name)
+                else:
+                    link_owner.append(stu_name)
+                if stu_name in grad_name:
+                    # port_status = 'verified'
+                    b_status.append('Verified')
+                else:
+                    port_status = 'Check Manually'
+                    b_status.append(port_status)
+            elif prof_links['Professional Links'][i] == github_url:
+                req = Request(port_url)
+                html_page = urlopen(req)
+                soup = BeautifulSoup(html_page, "lxml")
+                links = []
+                for link in soup.find_all("a"):
+                    links.append(link.get('href'))
+                if github_url in links:
+                    link_owner.append(grad_name)
+                    github_status = 'verified'
+                    b_status.append(github_status)
+                else:
+                    owner = 'Unknown'
+                    link_owner.append('Unknown')
+                    github_status = 'Check Manually'
+                    b_status.append(github_status)
+            elif prof_links['Professional Links'][i] == linkedin_url:
+                req = Request(port_url)
+                html_page = urlopen(req)
+                req = Request(port_url)
+                html_page = urlopen(req)
+                soup = BeautifulSoup(html_page, "lxml")
+                links = []
+                for link in soup.find_all('a'):
+                    link.get('href')
+                for link in soup.findAll('a'):
+                    links.append(link.get('href'))
+                if linkedin_url in links:
+                    link_owner.append(grad_name)
+                    github_status = 'verified'
+                    b_status.append(github_status)
+                else:
+                    owner = 'Unknown'
+                    link_owner.append('Unknown')
+                    github_status = 'Check Manually'
+                    b_status.append(github_status)
+    if department == 'Data Science':
+        for i in range(len(prof_links['Professional Links'])):
+            if prof_links['Professional Links'][i] == port_url:
+                reqs = requests.get(port_url)
+                soup = BeautifulSoup(reqs.content, 'html.parser')
+                for title in soup.find_all('title'):
+                    heading = title.get_text()
+                if grad_name in heading:
+                    owner = grad_name
+                    link_owner.append(owner)
+                    status = 'Verified'
+                    b_status.append(status)
+                else:
+                    owner = heading[25:]
+                    link_owner.append(owner)
+                    status = 'Check Manually'
+                    b_status.append(status)
+            elif prof_links['Professional Links'][i] == github_url:
+                req = Request(port_url)
+                html_page = urlopen(req)
+                soup = BeautifulSoup(html_page, "lxml")
+                links = []
+                for link in soup.find_all("a"):
+                    links.append(link.get('href'))
+                if github_url in links:
+                    link_owner.append(grad_name)
+                    github_status = 'verified'
+                    b_status.append(github_status)
+                else:
+                    owner = 'Unknown'
+                    link_owner.append('Unknown')
+                    github_status = 'Check Manually'
+                    b_status.append(github_status)
+            elif prof_links['Professional Links'][i] == linkedin_url:
+                req = Request(port_url)
+                html_page = urlopen(req)
+                req = Request(port_url)
+                html_page = urlopen(req)
+                soup = BeautifulSoup(html_page, "lxml")
+                links = []
+                for link in soup.find_all('a'):
+                    link.get('href')
+                for link in soup.findAll('a'):
+                    links.append(link.get('href'))
+                if linkedin_url in links:
+                    link_owner.append(grad_name)
+                    github_status = 'verified'
+                    b_status.append(github_status)
+                else:
+                    owner = 'Unknown'
+                    link_owner.append('Unknown')
+                    github_status = 'Check Manually'
+                    b_status.append(github_status)
+    proj_link_owner = []
+    proj_link_status = []
+    for i in range(len(project_match['Project Links'])):
+        req = Request(port_url)
+        html_page = urlopen(req)
+        soup = BeautifulSoup(html_page, "lxml")
+        links = []
+        for link in soup.find_all("a"):
+            links.append(link.get('href'))
+        if project_match['Project Links'][i] in links:
+            proj_link_owner.append(grad_name)
+            proj_link_status.append('Verified')
+        else:
+            owner = 'Unknown'
+            proj_link_owner.append('Unknown')
+            # github_status = 'Check Manually'
+            proj_link_status.append('Check Manually')
+    prof_links = pd.DataFrame({'Websites': websites, "Professional Links": match[0:3], "return Code": req_code, "Link Status": link_status,"Link owner": link_owner, 'Verification Status': b_status})
+    project_match = pd.DataFrame(
+        {"Project Links": match[3:], "Request code": proj_rcode, "Status": proj_stat, 'Project Owner': proj_link_owner,
+         'Verification Status': proj_link_status})
+    return grad_name, department, prof_links, project_match
+# Gradio interface
+iface = gr.Interface(fn=class_resume, inputs=['file'], outputs=['text', 'text', "dataframe", "dataframe"],
+                     title="Resume Screening")
+iface.launch()