File size: 3,453 Bytes
7b54322
1f609f4
7b54322
 
fb835f9
 
 
 
45921c5
1f609f4
7b54322
fb835f9
 
 
7b54322
45921c5
 
523e3d6
45921c5
fb835f9
3a9f7f8
fb835f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45921c5
fb835f9
45921c5
fb835f9
 
 
 
 
 
 
 
 
36492c8
 
fb835f9
36492c8
fb835f9
 
36492c8
fb835f9
 
36492c8
 
fb835f9
 
36492c8
fb835f9
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import streamlit as st
import subprocess
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import PyPDF2
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from transformers import pipeline
from gemini_flash import GeminiFlash  # Adjust if Gemini Flash is available

# Ensure that NLTK's stopwords are available
nltk.download('punkt')
nltk.download('stopwords')

# Initialize Hugging Face NER pipeline
ner_model = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")

# Initialize Gemini Flash for prompt engineering
prompt_engineer = GeminiFlash()

# Streamlit Interface
st.title("AI Resume and Job Description Analyzer")

# Step 1: Resume Upload
uploaded_file = st.file_uploader("Upload Resume (PDF)", type="pdf")

if uploaded_file is not None:
    # Read the PDF file
    pdf_reader = PyPDF2.PdfReader(uploaded_file)
    resume_text = ""
    for page in range(len(pdf_reader.pages)):
        resume_text += pdf_reader.pages[page].extract_text()

    # Display the resume text
    st.text_area("Resume Text", resume_text, height=300)

# Step 2: Job Description Input
job_description = st.text_area("Enter Job Description")

if job_description:
    # Preprocess job description using NLTK
    stop_words = set(stopwords.words("english"))
    tokens = word_tokenize(job_description)
    filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
    ps = nltk.PorterStemmer()
    stemmed_tokens = [ps.stem(word) for word in filtered_tokens]
    preprocessed_job_description = " ".join(stemmed_tokens)

    # Display preprocessed job description
    st.text_area("Processed Job Description", preprocessed_job_description)

# Step 3: Named Entity Recognition (NER) on Resume using Hugging Face Transformers
if resume_text:
    entities = ner_model(resume_text)

    # Display extracted entities
    st.subheader("Named Entities from Resume")
    st.write(entities)

# Step 4: Candidate-Job Relevance Using Cosine Similarity
if resume_text and job_description:
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([job_description, resume_text])
    cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])

    st.write(f"Cosine Similarity (Relevance): {cosine_sim[0][0]:.2f}")

# Step 5: Gemini Flash - Prompt Engineering (using Gemini Flash to craft relevant prompts for an LLM)
if resume_text and job_description:
    prompt = f"""
    Given the resume text and job description, evaluate how well the candidate's qualifications match the job requirements.
    
    Resume: {resume_text}
    Job Description: {job_description}
    
    Based on the information provided, generate a detailed match score between the candidate and the job.
    """
    
    # Use Gemini Flash to refine and enhance the prompt (assuming Gemini Flash enhances the prompt)
    enhanced_prompt = prompt_engineer.refine_prompt(prompt)
    
    # Display the enhanced prompt (for debugging or transparency)
    st.subheader("Enhanced Prompt for LLM")
    st.write(enhanced_prompt)

    # Here you would typically pass the `enhanced_prompt` to a large language model (LLM) API or model for evaluation
    # For demonstration purposes, assume a function `get_llm_response` exists that interacts with a model.
    # response = get_llm_response(enhanced_prompt)
    # st.write("LLM Response:", response)