Spaces:

DreamStream-1
/

HR-New

Sleeping

App Files Files Community

DreamStream-1 commited on Nov 14, 2024

Commit

3878720

verified ·

1 Parent(s): 567834b

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -160

app.py CHANGED Viewed

@@ -1,23 +1,17 @@
-import os
-import pandas as pd
-import google.generativeai as genai
-import PyPDF2 as pdf
-import io
 import re
-import streamlit as st
-from transformers import pipeline
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
-# Set API key for Google API (Make sure it's securely set in your environment variables)
-api_key = os.getenv('GOOGLE_API_KEY')
-if not api_key:
-    raise ValueError("API key not found. Please set GOOGLE_API_KEY in your Hugging Face Space secrets.")
-# Initialize the generative AI model
 genai.configure(api_key=api_key)
-# Load pre-trained models
 skill_extractor = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")
 education_extractor = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")
@@ -30,41 +24,31 @@ def input_pdf_text(uploaded_file):
         text += page.extract_text()
     return text
-# Extract candidate name directly from the model response
-def extract_name_from_model_response(response_text):
-    match = re.search(r"Candidate Name:\s*(.*)", response_text)
-    if match:
-        return match.group(1)
-    return "Not Available"
-# Extract email and phone numbers using regex
 def extract_contact_info(resume_text):
     email_match = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", resume_text)
     email = email_match.group(0) if email_match else "Not Available"
-    contact_match = re.search(r"\+?\(?\d{1,3}\)?[-.\s]?\(?\d{1,4}\)?[-.\s]?\d{3}[-.\s]?\d{4}|\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}", resume_text)
-    contact = contact_match.group(0) if contact_match else "Not Available"
-    return email, contact
-# Extract skills using NER model
 def extract_skills(resume_text):
     ner_results = skill_extractor(resume_text)
     skills = [entity['word'] for entity in ner_results if entity['entity_group'] == 'SKILL']
     return ", ".join(skills) if skills else "Not Available"
-# Extract education information using NER model
 def extract_education(resume_text):
     ner_results = education_extractor(resume_text)
     education_entities = [entity['word'] for entity in ner_results if entity['entity_group'] == 'EDUCATION']
-    # If no direct education found, fall back to regex-based extraction
     if education_entities:
         return ", ".join(education_entities)
     else:
-        # Using regex to capture education background
         edu_patterns = [
-            r"(Bachelor of .+|Master of .+|PhD|BSc|MSc|MBA|B.A|M.A|B.Tech|M.Tech|Doctorate|Engineering|Computer Science|Information Technology|Data Science)",
             r"(University of [A-Za-z]+.*)"
         ]
         education = []
@@ -74,141 +58,53 @@ def extract_education(resume_text):
         return ", ".join(education) if education else "Not Available"
-# Extract team leadership and management years from the resume
-def extract_experience_years(text):
-    years = 0
-    patterns = [
-        r"(\d{4})\s?[-to]+\s?(\d{4})",  # From year to year
-        r"(\d+) years",  # Exact mention of years
-        r"since (\d{4})",  # Mentions "since"
-        r"(\d+)\s?[\-–]\s?(\d+)",  # Handles year ranges with hyphens (e.g., 2015-2020)
-        r"(\d+)\s?[\–]\s?present",  # Present with range (e.g., 2019–present)
-    ]
-    for pattern in patterns:
-        matches = re.findall(pattern, text)
-        for match in matches:
-            if len(match) == 2:
-                start_year = int(match[0])
-                end_year = int(match[1])
-                years += end_year - start_year
-            elif len(match) == 1:
-                years += int(match[0])
-    return years
-# Calculate the match percentage using TF-IDF and cosine similarity
 def calculate_match_percentage(resume_text, job_description):
-    # Combine resume and job description into a list
     documents = [resume_text, job_description]
-    # Use TF-IDF to convert the documents into vectors
     tfidf_vectorizer = TfidfVectorizer(stop_words='english')
     tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
-    # Compute cosine similarity between resume and job description
     cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
-    # Return match percentage based on cosine similarity score
-    match_percentage = cosine_sim[0][0] * 100
-    return round(match_percentage, 2)
-# Generate the detailed analysis from the Gemini model
-def get_gemini_response(input_text, job_description):
-    prompt = f"""
-    Act as an Applicant Tracking System. Analyze the resume with respect to the job description.
-    Candidate Details: {input_text}
-    Job Description: {job_description}
-    Please extract the following:
-    1. Candidate Name
-    2. Relevant Skills
-    3. Educational Background
-    4. Direct Team Leadership Experience (in years)
-    5. Direct Management Experience (in years)
-    6. Match percentage with the job description
-    7. Provide a resume summary in 5 bullet points highlighting the key details about the candidate
-    """
     model = genai.GenerativeModel('gemini-1.5-flash')
     response = model.generate_content(prompt)
     return response.text.strip()
-# Streamlit interface to upload files and provide job description
-st.title("Resume ATS Analysis Tool")
-st.markdown("### Upload Resume and Job Description for Analysis")
-# File uploader for resume PDF
-uploaded_file = st.file_uploader("Upload Resume PDF", type=["pdf"])
-# Job description text input
-job_description = st.text_area("Job Description", height=200)
-if uploaded_file and job_description:
-    analyze_button = st.button("Analyze")
-    if analyze_button:
-        # Extract text from the uploaded PDF
-        resume_text = input_pdf_text(uploaded_file)
-        # Fetch analysis from the AI model
-        response_text = get_gemini_response(resume_text, job_description)
-        # Initialize an empty dictionary to hold the dynamic data
-        data = {}
-        # Extract candidate name
-        name = extract_name_from_model_response(response_text)
-        data['Candidate_Name'] = name if name != "Not Available" else "Not Available"
-        # Extract contact info (email, phone)
-        email, contact = extract_contact_info(resume_text)
-        data['Email'] = email if email != "Not Available" else "Not Available"
-        data['Contact'] = contact if contact != "Not Available" else "Not Available"
-        # Extract team leadership and management experience
-        team_leadership_years = extract_experience_years(resume_text)
-        management_years = extract_experience_years(resume_text)
-        data['Direct_Team_Leadership_Experience_Years'] = team_leadership_years if team_leadership_years > 0 else "Not Available"
-        data['Direct_Management_Experience_Years'] = management_years if management_years > 0 else "Not Available"
-        # Extract skills using the NER model
-        relevant_skills = extract_skills(resume_text)
-        data['Relevant_Skills_and_Qualifications'] = relevant_skills if relevant_skills != "Not Available" else "Not Available"
-        # Extract education using the NER model or regex
-        educational_background = extract_education(resume_text)
-        data['Educational_Background'] = educational_background if educational_background != "Not Available" else "Not Available"
-        # Calculate match percentage dynamically
-        match_percentage = calculate_match_percentage(resume_text, job_description)
-        data['Match_Percentage'] = match_percentage
-        # Calculate Job Description Match Score dynamically (based on match percentage)
-        if match_percentage >= 80:
-            job_description_match_score = "High"
-        elif match_percentage >= 60:
-            job_description_match_score = "Medium"
-        else:
-            job_description_match_score = "Low"
-        data['Job_Description_Match_Score'] = job_description_match_score
-        # Extract resume summary from the model response (5 bullet points)
-        bullet_summary = "Resume Summary (5 Bullet Points):\n"
-        bullets = response_text.split("\n")[-5:]  # Assuming the last 5 lines are the bullet points
-        for idx, bullet in enumerate(bullets):
-            bullet_summary += f"{idx+1}. {bullet}\n"
-        data['Resume_Summary'] = bullet_summary
-        # Prepare DataFrame for output
-        df = pd.DataFrame([data])
-        # Show the result table
-        st.write(df)
-        # Provide a download button for the CSV file
-        csv = df.to_csv(index=False)
-        st.download_button(
-            label="Download Analysis as CSV",
-            data=csv,
-            file_name="resume_analysis.csv",
-            mime="text/csv"
-        )

 import re
+import io
+import PyPDF2 as pdf
+import pandas as pd
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
+from transformers import pipeline
+import google.generativeai as genai
+# Initialize Google Gemini AI API for summarization (ensure API key is set up)
+api_key = 'YOUR_GOOGLE_API_KEY'
 genai.configure(api_key=api_key)
+# Set up NER pipelines for skill and education extraction
 skill_extractor = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")
 education_extractor = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")
         text += page.extract_text()
     return text
+# Extract contact information using regular expressions (email, phone)
 def extract_contact_info(resume_text):
     email_match = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", resume_text)
+    phone_match = re.search(r"\+?\(?\d{1,3}\)?[-.\s]?\(?\d{1,4}\)?[-.\s]?\d{3}[-.\s]?\d{4}|\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}", resume_text)
     email = email_match.group(0) if email_match else "Not Available"
+    phone = phone_match.group(0) if phone_match else "Not Available"
+    return email, phone
+# Extract skills using NER model or regex
 def extract_skills(resume_text):
     ner_results = skill_extractor(resume_text)
     skills = [entity['word'] for entity in ner_results if entity['entity_group'] == 'SKILL']
     return ", ".join(skills) if skills else "Not Available"
+# Extract education details using NER model
 def extract_education(resume_text):
     ner_results = education_extractor(resume_text)
     education_entities = [entity['word'] for entity in ner_results if entity['entity_group'] == 'EDUCATION']
+    # If no direct education found, use regex patterns to capture education
     if education_entities:
         return ", ".join(education_entities)
     else:
         edu_patterns = [
+            r"(Bachelor of .+|Master of .+|PhD|BSc|MSc|MBA|B.A|M.A|B.Tech|M.Tech)",
             r"(University of [A-Za-z]+.*)"
         ]
         education = []
         return ", ".join(education) if education else "Not Available"
+# Calculate match percentage between resume and job description
 def calculate_match_percentage(resume_text, job_description):
     documents = [resume_text, job_description]
     tfidf_vectorizer = TfidfVectorizer(stop_words='english')
     tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
     cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
+    return cosine_sim[0][0] * 100  # Return percentage match
+# Generate resume summary using Gemini API
+def generate_resume_summary(resume_text):
+    prompt = f"Summarize the resume below in 5 bullet points:\n\n{resume_text}"
     model = genai.GenerativeModel('gemini-1.5-flash')
     response = model.generate_content(prompt)
     return response.text.strip()
+# Main function to process and analyze the resume and job description
+def analyze_resume_and_job(resume_pdf, job_description):
+    resume_text = input_pdf_text(resume_pdf)
+    # Extract candidate details
+    email, phone = extract_contact_info(resume_text)
+    skills = extract_skills(resume_text)
+    education = extract_education(resume_text)
+    # Calculate match percentage
+    match_percentage = calculate_match_percentage(resume_text, job_description)
+    # Generate summary using Gemini
+    resume_summary = generate_resume_summary(resume_text)
+    # Prepare results for display
+    result_data = {
+        'Email': email,
+        'Phone': phone,
+        'Skills': skills,
+        'Education': education,
+        'Match_Percentage': match_percentage,
+        'Resume_Summary': resume_summary
+    }
+    # Create a DataFrame for display
+    df = pd.DataFrame([result_data])
+    return df
+# Example usage
+resume_pdf = 'path_to_resume.pdf'  # Path to the uploaded resume PDF
+job_description = '''Insert job description here'''  # Job description text
+df = analyze_resume_and_job(resume_pdf, job_description)
+print(df)