import streamlit as st import subprocess from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import PyPDF2 import nltk from nltk.corpus import stopwords from nltk.tokenize import word_tokenize from transformers import pipeline from gemini_flash import GeminiFlash # Adjust if Gemini Flash is available # Ensure that NLTK's stopwords are available nltk.download('punkt') nltk.download('stopwords') # Initialize Hugging Face NER pipeline ner_model = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english") # Initialize Gemini Flash for prompt engineering prompt_engineer = GeminiFlash() # Streamlit Interface st.title("AI Resume and Job Description Analyzer") # Step 1: Resume Upload uploaded_file = st.file_uploader("Upload Resume (PDF)", type="pdf") if uploaded_file is not None: # Read the PDF file pdf_reader = PyPDF2.PdfReader(uploaded_file) resume_text = "" for page in range(len(pdf_reader.pages)): resume_text += pdf_reader.pages[page].extract_text() # Display the resume text st.text_area("Resume Text", resume_text, height=300) # Step 2: Job Description Input job_description = st.text_area("Enter Job Description") if job_description: # Preprocess job description using NLTK stop_words = set(stopwords.words("english")) tokens = word_tokenize(job_description) filtered_tokens = [word for word in tokens if word.lower() not in stop_words] ps = nltk.PorterStemmer() stemmed_tokens = [ps.stem(word) for word in filtered_tokens] preprocessed_job_description = " ".join(stemmed_tokens) # Display preprocessed job description st.text_area("Processed Job Description", preprocessed_job_description) # Step 3: Named Entity Recognition (NER) on Resume using Hugging Face Transformers if resume_text: entities = ner_model(resume_text) # Display extracted entities st.subheader("Named Entities from Resume") st.write(entities) # Step 4: Candidate-Job Relevance Using Cosine Similarity if resume_text and job_description: vectorizer = TfidfVectorizer() tfidf_matrix = vectorizer.fit_transform([job_description, resume_text]) cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2]) st.write(f"Cosine Similarity (Relevance): {cosine_sim[0][0]:.2f}") # Step 5: Gemini Flash - Prompt Engineering (using Gemini Flash to craft relevant prompts for an LLM) if resume_text and job_description: prompt = f""" Given the resume text and job description, evaluate how well the candidate's qualifications match the job requirements. Resume: {resume_text} Job Description: {job_description} Based on the information provided, generate a detailed match score between the candidate and the job. """ # Use Gemini Flash to refine and enhance the prompt (assuming Gemini Flash enhances the prompt) enhanced_prompt = prompt_engineer.refine_prompt(prompt) # Display the enhanced prompt (for debugging or transparency) st.subheader("Enhanced Prompt for LLM") st.write(enhanced_prompt) # Here you would typically pass the `enhanced_prompt` to a large language model (LLM) API or model for evaluation # For demonstration purposes, assume a function `get_llm_response` exists that interacts with a model. # response = get_llm_response(enhanced_prompt) # st.write("LLM Response:", response)