Spaces:

DilshanKavinda
/

CV_Shortlisting_App

Sleeping

App Files Files Community

DilshanKavinda commited on Mar 29

Commit

5886c55

•

1 Parent(s): 00033f7

Create app.py

Browse files

Files changed (1) hide show

app.py +204 -0

app.py ADDED Viewed

	@@ -0,0 +1,204 @@

+import streamlit as st
+import pdfplumber
+import pandas as pd
+from sentence_transformers import SentenceTransformer, util
+model = SentenceTransformer('all-MiniLM-L6-v2')
+def extract_information_from_cv(pdf_content):
+    with pdfplumber.open(pdf_content) as pdf:
+        text = ""
+        for page in pdf.pages:
+            text += page.extract_text()
+    return text
+def extract_title(text):
+    # Extract the title section
+    Title_start = text.find("Title:")
+    Title_end = text.find("Name:")
+    return text[Title_start + len("Title:"):Title_end].strip() if Title_start != -1 and Title_end != -1 else None
+def extract_name(text):
+    # Extract the name section
+    Name_start = text.find("Name:")
+    Name_end = text.find("Email:")
+    return text[Name_start + len("Name:"):Name_end].strip() if Name_start != -1 and Name_end != -1 else None
+def extract_Email(text):
+    # Extract the Email section
+    Email_start = text.find("Email:")
+    Email_end = text.find("Phone:")
+    return text[Email_start + len("Email:"):Email_end].strip() if Email_start != -1 and Email_end != -1 else None
+def extract_Phone(text):
+    # Extract the Phone section
+    Phone_start = text.find("Phone:")
+    Phone_end = text.find("LinkedIn:")
+    return text[Phone_start + len("Phone:"):Phone_end].strip() if Phone_start != -1 and Phone_end != -1 else None
+def extract_LinkedIn(text):
+    # Extract the LinkedIn section
+    LinkedIn_start = text.find("LinkedIn:")
+    LinkedIn_end = text.find("GitHub:")
+    return text[LinkedIn_start + len("LinkedIn:"):LinkedIn_end].strip() if LinkedIn_start != -1 and LinkedIn_end != -1 else None
+def extract_Github(text):
+    # Extract the Github section
+    Github_start = text.find("GitHub:")
+    Github_end = text.find("Summary:")
+    return text[Github_start + len("GitHub:"):Github_end].strip() if Github_start != -1 and Github_end != -1 else None
+def extract_summary(text):
+    summary_start = text.find("Summary:")
+    summary_end = text.find("Education:")
+    return text[summary_start + len("Summary:"):summary_end].strip() if summary_start != -1 and summary_end != -1 else None
+def extract_education(text):
+    education_start = text.find("Education:")
+    education_end = text.find("Internship:")
+    return text[education_start + len("Education:"):education_end].strip() if education_start != -1 and education_end != -1 else None
+def extract_Internship(text):
+    Internship_start = text.find("Internship:")
+    Internship_end = text.find("Professional Experience:")
+    return text[Internship_start + len("Internship:"):Internship_end].strip() if Internship_start != -1 and Internship_end != -1 else None
+def extract_experience(text):
+    exp_start = text.find("Professional Experience:")
+    exp_end = text.find("Projects:")
+    return text[exp_start + len("Professional Experience:"):exp_end].strip() if exp_start != -1 and exp_end != -1 else None
+def extract_projects(text):
+    projects_start = text.find("Projects:")
+    projects_end = text.find("Awards and Certifications:")
+    return text[projects_start + len("Projects:"):projects_end].strip() if projects_start != -1 and projects_end != -1 else None
+def extract_certifications(text):
+    certifications_start = text.find("Awards and Certifications:")
+    certifications_end = text.find("Skills:")
+    return text[certifications_start + len("Awards and Certifications:"):certifications_end].strip() if certifications_start != -1 and certifications_end != -1 else None
+def extract_skills(text):
+    skills_start = text.find("Skills:")
+    return text[skills_start + len("Skills:"):].strip() if skills_start != -1 else None
+def main():
+    st.title("CV Shortlisting App")
+    job_description = st.text_area('Job description')
+    uploaded_files = st.file_uploader("Choose multiple CV files", type="pdf", accept_multiple_files=True)
+    options = [i+1 for i in range(len(uploaded_files))]
+    no_of_candidates = st.selectbox('No of candidates need:', options)
+    if no_of_candidates:
+        extract_button = st.button("Extract Data")
+    extracted_data = []
+    cv_data = []
+    if uploaded_files and extract_button:
+        for uploaded_file in uploaded_files:
+            cv_text = extract_information_from_cv(uploaded_file)
+            cv_data.append(cv_text)
+            title = extract_title(cv_text)
+            name = extract_name(cv_text)
+            phone = extract_Phone(cv_text)
+            email = extract_Email(cv_text)
+            linkedin = extract_LinkedIn(cv_text)
+            github = extract_Github(cv_text)
+            summary = extract_summary(cv_text)
+            education = extract_education(cv_text)
+            internship = extract_Internship(cv_text)
+            experience = extract_experience(cv_text)
+            projects = extract_projects(cv_text)
+            certifications = extract_certifications(cv_text)
+            skills = extract_skills(cv_text)
+            data = {
+                "Title": [title],
+                "Name": [name],
+                "Email": [email],
+                "Phone": [phone],
+                "LinkedIn": [linkedin],
+                "Github": [github],
+                "Summary": [summary],
+                "Education": [education],
+                "Internships":[internship],
+                "Professional Experience": [experience],
+                "Projects": [projects],
+                "Awards and Certifications":[certifications],
+                "Skills": [skills]
+            }
+            extracted_data.append(data)
+        # Two lists of sentences
+        sentences1 = job_description
+        sentences2 = cv_data
+        #Compute embedding for both lists
+        embeddings1 = model.encode(sentences1, convert_to_tensor=True)
+        embeddings2 = model.encode(sentences2, convert_to_tensor=True)
+        #Compute cosine-similarities
+        cosine_scores = util.cos_sim(embeddings1, embeddings2)
+        Scores = []
+        #Output the pairs with their score
+        for i in range(len(sentences2)):
+            score = cosine_scores[0][i]
+            Scores.append(score)
+        st.write("### Extracted Data:")
+        final_df = pd.DataFrame(extracted_data)
+        final_df['Score'] = Scores
+        df_sorted = final_df.sort_values(by='Score', ascending=False)
+        # Extract information for the top students
+        top_cvs = df_sorted.head(no_of_candidates)
+        top_cv_list = []
+        top_emails = top_cvs['Email'].values
+        for email in top_emails:
+            for cv in cv_data:
+                if email[0] in cv:
+                    top_cv_list.append(cv)
+        st.write(df_sorted)
+        st.subheader(f"Top {no_of_candidates} Candidates's cv")
+        st.write(top_cv_list)
+if __name__ == "__main__":
+    main()