DilshanKavinda commited on
Commit
5886c55
1 Parent(s): 00033f7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +204 -0
app.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pdfplumber
3
+ import pandas as pd
4
+
5
+ from sentence_transformers import SentenceTransformer, util
6
+ model = SentenceTransformer('all-MiniLM-L6-v2')
7
+
8
+ def extract_information_from_cv(pdf_content):
9
+ with pdfplumber.open(pdf_content) as pdf:
10
+ text = ""
11
+ for page in pdf.pages:
12
+ text += page.extract_text()
13
+
14
+ return text
15
+
16
+ def extract_title(text):
17
+ # Extract the title section
18
+ Title_start = text.find("Title:")
19
+ Title_end = text.find("Name:")
20
+
21
+ return text[Title_start + len("Title:"):Title_end].strip() if Title_start != -1 and Title_end != -1 else None
22
+
23
+
24
+ def extract_name(text):
25
+ # Extract the name section
26
+ Name_start = text.find("Name:")
27
+ Name_end = text.find("Email:")
28
+
29
+ return text[Name_start + len("Name:"):Name_end].strip() if Name_start != -1 and Name_end != -1 else None
30
+
31
+
32
+ def extract_Email(text):
33
+ # Extract the Email section
34
+ Email_start = text.find("Email:")
35
+ Email_end = text.find("Phone:")
36
+
37
+ return text[Email_start + len("Email:"):Email_end].strip() if Email_start != -1 and Email_end != -1 else None
38
+
39
+
40
+ def extract_Phone(text):
41
+ # Extract the Phone section
42
+ Phone_start = text.find("Phone:")
43
+ Phone_end = text.find("LinkedIn:")
44
+
45
+ return text[Phone_start + len("Phone:"):Phone_end].strip() if Phone_start != -1 and Phone_end != -1 else None
46
+
47
+
48
+ def extract_LinkedIn(text):
49
+ # Extract the LinkedIn section
50
+ LinkedIn_start = text.find("LinkedIn:")
51
+ LinkedIn_end = text.find("GitHub:")
52
+
53
+ return text[LinkedIn_start + len("LinkedIn:"):LinkedIn_end].strip() if LinkedIn_start != -1 and LinkedIn_end != -1 else None
54
+
55
+
56
+ def extract_Github(text):
57
+ # Extract the Github section
58
+ Github_start = text.find("GitHub:")
59
+ Github_end = text.find("Summary:")
60
+
61
+ return text[Github_start + len("GitHub:"):Github_end].strip() if Github_start != -1 and Github_end != -1 else None
62
+
63
+
64
+ def extract_summary(text):
65
+ summary_start = text.find("Summary:")
66
+ summary_end = text.find("Education:")
67
+
68
+ return text[summary_start + len("Summary:"):summary_end].strip() if summary_start != -1 and summary_end != -1 else None
69
+
70
+
71
+ def extract_education(text):
72
+ education_start = text.find("Education:")
73
+ education_end = text.find("Internship:")
74
+
75
+ return text[education_start + len("Education:"):education_end].strip() if education_start != -1 and education_end != -1 else None
76
+
77
+
78
+ def extract_Internship(text):
79
+ Internship_start = text.find("Internship:")
80
+ Internship_end = text.find("Professional Experience:")
81
+
82
+ return text[Internship_start + len("Internship:"):Internship_end].strip() if Internship_start != -1 and Internship_end != -1 else None
83
+
84
+ def extract_experience(text):
85
+ exp_start = text.find("Professional Experience:")
86
+ exp_end = text.find("Projects:")
87
+
88
+ return text[exp_start + len("Professional Experience:"):exp_end].strip() if exp_start != -1 and exp_end != -1 else None
89
+
90
+
91
+ def extract_projects(text):
92
+ projects_start = text.find("Projects:")
93
+ projects_end = text.find("Awards and Certifications:")
94
+
95
+ return text[projects_start + len("Projects:"):projects_end].strip() if projects_start != -1 and projects_end != -1 else None
96
+
97
+
98
+ def extract_certifications(text):
99
+ certifications_start = text.find("Awards and Certifications:")
100
+ certifications_end = text.find("Skills:")
101
+
102
+ return text[certifications_start + len("Awards and Certifications:"):certifications_end].strip() if certifications_start != -1 and certifications_end != -1 else None
103
+
104
+
105
+ def extract_skills(text):
106
+ skills_start = text.find("Skills:")
107
+
108
+ return text[skills_start + len("Skills:"):].strip() if skills_start != -1 else None
109
+
110
+
111
+ def main():
112
+ st.title("CV Shortlisting App")
113
+ job_description = st.text_area('Job description')
114
+ uploaded_files = st.file_uploader("Choose multiple CV files", type="pdf", accept_multiple_files=True)
115
+ options = [i+1 for i in range(len(uploaded_files))]
116
+ no_of_candidates = st.selectbox('No of candidates need:', options)
117
+ if no_of_candidates:
118
+ extract_button = st.button("Extract Data")
119
+
120
+ extracted_data = []
121
+ cv_data = []
122
+
123
+
124
+ if uploaded_files and extract_button:
125
+ for uploaded_file in uploaded_files:
126
+ cv_text = extract_information_from_cv(uploaded_file)
127
+
128
+ cv_data.append(cv_text)
129
+
130
+ title = extract_title(cv_text)
131
+ name = extract_name(cv_text)
132
+ phone = extract_Phone(cv_text)
133
+ email = extract_Email(cv_text)
134
+ linkedin = extract_LinkedIn(cv_text)
135
+ github = extract_Github(cv_text)
136
+ summary = extract_summary(cv_text)
137
+ education = extract_education(cv_text)
138
+ internship = extract_Internship(cv_text)
139
+ experience = extract_experience(cv_text)
140
+ projects = extract_projects(cv_text)
141
+ certifications = extract_certifications(cv_text)
142
+ skills = extract_skills(cv_text)
143
+
144
+ data = {
145
+ "Title": [title],
146
+ "Name": [name],
147
+ "Email": [email],
148
+ "Phone": [phone],
149
+ "LinkedIn": [linkedin],
150
+ "Github": [github],
151
+ "Summary": [summary],
152
+ "Education": [education],
153
+ "Internships":[internship],
154
+ "Professional Experience": [experience],
155
+ "Projects": [projects],
156
+ "Awards and Certifications":[certifications],
157
+ "Skills": [skills]
158
+ }
159
+
160
+ extracted_data.append(data)
161
+
162
+ # Two lists of sentences
163
+ sentences1 = job_description
164
+
165
+ sentences2 = cv_data
166
+
167
+ #Compute embedding for both lists
168
+ embeddings1 = model.encode(sentences1, convert_to_tensor=True)
169
+ embeddings2 = model.encode(sentences2, convert_to_tensor=True)
170
+
171
+ #Compute cosine-similarities
172
+ cosine_scores = util.cos_sim(embeddings1, embeddings2)
173
+
174
+ Scores = []
175
+
176
+ #Output the pairs with their score
177
+ for i in range(len(sentences2)):
178
+ score = cosine_scores[0][i]
179
+ Scores.append(score)
180
+
181
+ st.write("### Extracted Data:")
182
+ final_df = pd.DataFrame(extracted_data)
183
+ final_df['Score'] = Scores
184
+ df_sorted = final_df.sort_values(by='Score', ascending=False)
185
+
186
+ # Extract information for the top students
187
+ top_cvs = df_sorted.head(no_of_candidates)
188
+
189
+ top_cv_list = []
190
+ top_emails = top_cvs['Email'].values
191
+
192
+
193
+ for email in top_emails:
194
+ for cv in cv_data:
195
+ if email[0] in cv:
196
+ top_cv_list.append(cv)
197
+
198
+ st.write(df_sorted)
199
+
200
+ st.subheader(f"Top {no_of_candidates} Candidates's cv")
201
+ st.write(top_cv_list)
202
+
203
+ if __name__ == "__main__":
204
+ main()