Spaces:
Sleeping
Sleeping
import streamlit as st | |
import subprocess | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics.pairwise import cosine_similarity | |
import PyPDF2 | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk.tokenize import word_tokenize | |
from transformers import pipeline | |
from gemini_flash import GeminiFlash # Adjust if Gemini Flash is available | |
# Ensure that NLTK's stopwords are available | |
nltk.download('punkt') | |
nltk.download('stopwords') | |
# Initialize Hugging Face NER pipeline | |
ner_model = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english") | |
# Initialize Gemini Flash for prompt engineering | |
prompt_engineer = GeminiFlash() | |
# Streamlit Interface | |
st.title("AI Resume and Job Description Analyzer") | |
# Step 1: Resume Upload | |
uploaded_file = st.file_uploader("Upload Resume (PDF)", type="pdf") | |
if uploaded_file is not None: | |
# Read the PDF file | |
pdf_reader = PyPDF2.PdfReader(uploaded_file) | |
resume_text = "" | |
for page in range(len(pdf_reader.pages)): | |
resume_text += pdf_reader.pages[page].extract_text() | |
# Display the resume text | |
st.text_area("Resume Text", resume_text, height=300) | |
# Step 2: Job Description Input | |
job_description = st.text_area("Enter Job Description") | |
if job_description: | |
# Preprocess job description using NLTK | |
stop_words = set(stopwords.words("english")) | |
tokens = word_tokenize(job_description) | |
filtered_tokens = [word for word in tokens if word.lower() not in stop_words] | |
ps = nltk.PorterStemmer() | |
stemmed_tokens = [ps.stem(word) for word in filtered_tokens] | |
preprocessed_job_description = " ".join(stemmed_tokens) | |
# Display preprocessed job description | |
st.text_area("Processed Job Description", preprocessed_job_description) | |
# Step 3: Named Entity Recognition (NER) on Resume using Hugging Face Transformers | |
if resume_text: | |
entities = ner_model(resume_text) | |
# Display extracted entities | |
st.subheader("Named Entities from Resume") | |
st.write(entities) | |
# Step 4: Candidate-Job Relevance Using Cosine Similarity | |
if resume_text and job_description: | |
vectorizer = TfidfVectorizer() | |
tfidf_matrix = vectorizer.fit_transform([job_description, resume_text]) | |
cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2]) | |
st.write(f"Cosine Similarity (Relevance): {cosine_sim[0][0]:.2f}") | |
# Step 5: Gemini Flash - Prompt Engineering (using Gemini Flash to craft relevant prompts for an LLM) | |
if resume_text and job_description: | |
prompt = f""" | |
Given the resume text and job description, evaluate how well the candidate's qualifications match the job requirements. | |
Resume: {resume_text} | |
Job Description: {job_description} | |
Based on the information provided, generate a detailed match score between the candidate and the job. | |
""" | |
# Use Gemini Flash to refine and enhance the prompt (assuming Gemini Flash enhances the prompt) | |
enhanced_prompt = prompt_engineer.refine_prompt(prompt) | |
# Display the enhanced prompt (for debugging or transparency) | |
st.subheader("Enhanced Prompt for LLM") | |
st.write(enhanced_prompt) | |
# Here you would typically pass the `enhanced_prompt` to a large language model (LLM) API or model for evaluation | |
# For demonstration purposes, assume a function `get_llm_response` exists that interacts with a model. | |
# response = get_llm_response(enhanced_prompt) | |
# st.write("LLM Response:", response) | |