Spaces:
Sleeping
Sleeping
import streamlit as st | |
import PyPDF2 | |
from docx import Document | |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
import spacy | |
import pytextrank | |
from langchain_google_genai import ChatGoogleGenerativeAI | |
from langchain_core.messages import SystemMessage | |
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder | |
from langchain.memory import ConversationBufferMemory | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_core.runnables import RunnablePassthrough, RunnableLambda | |
import spacy | |
import subprocess | |
# Function to check and download spaCy model | |
def ensure_spacy_model(model_name="en_core_web_sm"): | |
try: | |
spacy.load(model_name) | |
except OSError: | |
subprocess.run(["python", "-m", "spacy", "download", model_name]) | |
spacy.load(model_name) | |
# Function to extract text from PDF | |
def extract_text_from_pdf(uploaded_file): | |
text = "" | |
reader = PyPDF2.PdfReader(uploaded_file) | |
for page in reader.pages: | |
text += page.extract_text() | |
return text | |
# Function to extract text from Word document | |
def extract_text_from_word(uploaded_file): | |
text = "" | |
doc = Document(uploaded_file) | |
for paragraph in doc.paragraphs: | |
text += paragraph.text + "\n" | |
return text | |
# Function to summarize text | |
def summarize_text(text, max_length=1000, min_length=30): | |
max_length = min(max_length, 1000) # Ensure max_length doesn't exceed 1000 | |
try: | |
# Initialize the summarizer pipeline | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False) | |
if isinstance(summary, list) and len(summary) > 0: | |
return summary[0]['summary_text'] | |
else: | |
raise ValueError("Unexpected summarizer output format") | |
except Exception as e: | |
return f"Error in summarization: {e}" | |
# Function to extract keywords using spaCy and PyTextRank | |
def extract_keywords(text, top_n=15): | |
ensure_spacy_model("en_core_web_sm") | |
nlp = spacy.load("en_core_web_sm") | |
nlp.add_pipe("textrank", last=True) | |
doc = nlp(text) | |
keywords = [phrase.text for phrase in doc._.phrases[:top_n]] | |
return keywords | |
# Initialize Google Generative AI chat model | |
def initialize_chat_model(): | |
with open("key.txt", "r") as f: | |
GOOGLE_API_KEY = f.read().strip() | |
chat_model = ChatGoogleGenerativeAI( | |
google_api_key=GOOGLE_API_KEY, | |
model="gemini-1.5-pro-latest", | |
temperature=0.4, | |
max_tokens=2000, | |
timeout=120, | |
max_retries=5, | |
top_p=0.9, | |
top_k=40, | |
presence_penalty=0.6, | |
frequency_penalty=0.3 | |
) | |
return chat_model | |
chat_model = initialize_chat_model() | |
# Create Chat Template | |
chat_prompt_template = ChatPromptTemplate.from_messages( | |
[ | |
SystemMessage( | |
content=""" You are a language model designed to follow user instructions exactly as given. | |
Do not take any actions or provide any information unless specifically directed by the user. | |
Your role is to fulfill the user's requests precisely without deviating from the instructions provided.""" | |
), | |
MessagesPlaceholder(variable_name="chat_history"), | |
HumanMessagePromptTemplate.from_template("{human_input}") | |
] | |
) | |
# Initialize the Memory | |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) | |
# Create an Output Parser | |
output_parser = StrOutputParser() | |
# Define a chain | |
chain = RunnablePassthrough.assign( | |
chat_history=RunnableLambda(lambda human_input: memory.load_memory_variables(human_input)['chat_history']) | |
) | chat_prompt_template | chat_model | output_parser | |
# Streamlit App | |
st.title("Interview Preparation with AI") | |
st.markdown("## Part-1: Upload Files, Summarize, and Extract Keywords") | |
# File upload section | |
file1 = st.file_uploader("Upload your resume (PDF or DOCX):", type=["pdf", "docx"]) | |
file2 = st.file_uploader("Upload the job description (PDF or DOCX):", type=["pdf", "docx"]) | |
if file1 and file2: | |
try: | |
# Detect file type and extract text for file 1 | |
if file1.name.endswith('.pdf'): | |
text1 = extract_text_from_pdf(file1) | |
elif file1.name.endswith('.docx'): | |
text1 = extract_text_from_word(file1) | |
else: | |
st.error("Unsupported file type for file 1") | |
# Detect file type and extract text for file 2 | |
if file2.name.endswith('.pdf'): | |
text2 = extract_text_from_pdf(file2) | |
elif file2.name.endswith('.docx'): | |
text2 = extract_text_from_word(file2) | |
else: | |
st.error("Unsupported file type for file 2") | |
# Summarize texts | |
#st.markdown("### Summarizing the uploaded documents...") | |
#summary1 = summarize_text(text1) | |
#summary2 = summarize_text(text2) | |
#st.markdown("### Results for File 1 (Resume)") | |
#st.subheader("Summary:") | |
#st.write(summary1) | |
#st.markdown("### Results for File 2 (Job Description)") | |
#st.subheader("Summary:") | |
#st.write(summary2) | |
# Ensure session state variables are initialized | |
if "keywords_extracted" not in st.session_state: | |
st.session_state.keywords_extracted = False | |
if "ats_score_calculated" not in st.session_state: | |
st.session_state.ats_score_calculated = False | |
# Button to Extract Keywords | |
if st.button("Extract Keywords") or st.session_state.keywords_extracted: | |
st.session_state.keywords_extracted = True | |
# Extract keywords | |
st.markdown("### Extracting keywords...") | |
keywords1 = extract_keywords(text1) | |
keywords2 = extract_keywords(text2) | |
# Display Keywords | |
st.markdown("### Results for File 1 (Resume)") | |
st.subheader("Keywords:") | |
st.write(", ".join(keywords1)) | |
st.markdown("### Results for File 2 (Job Description)") | |
st.subheader("Keywords:") | |
st.write(", ".join(keywords2)) | |
# Button to Calculate ATS Score | |
if st.button("ATS Score") or st.session_state.ats_score_calculated: | |
st.session_state.ats_score_calculated = True | |
resume_keywords = set(keywords1) | |
job_description_keywords = set(keywords2) | |
st.markdown("### ATS Score Calculation") | |
query = {"human_input": f""" | |
"Your task is to act as a highly advanced Applicant Tracking System (ATS) that evaluates the compatibility of a candidate's resume with a given job description. You will meticulously extract and analyze all relevant keywords and information from both the resume and the job description, including but not limited to Role-Specific Keywords, Technical Skills, Certifications, Experience, Soft Skills, Job Responsibilities, Industry Keywords, Methodologies and Practices, Keywords Indicating Preferences, and Core Values. | |
You will then calculate an ATS score on a scale of 0-100, reflecting how well the resume matches the job description. The score should be based on the following criteria: | |
Keywords Matching (20%): The extent to which the resume contains the exact keywords and phrases mentioned in the job description. | |
Skills and Competencies (20%): The presence and relevance of skills and competencies that align with the job requirements. | |
Formatting (10%): The clarity and simplicity of the resume format, ensuring that the ATS can easily parse the information. | |
Job Title Match (10%): The similarity between the candidate's previous job titles and the job title in the description. | |
Experience and Education (20%): Whether the candidate's experience level and education meet the job requirements. | |
Customization (20%): How well the resume is tailored to the specific job description, including the use of industry-specific language and terminology. | |
For each criterion, provide a detailed breakdown of the match percentage, highlighting where the candidate meets the requirements and where there are gaps. Finally, provide an overall ATS score and a summary of the candidate's strengths and areas for improvement. | |
Ensure that the evaluation is done in real-time and with 100% accuracy, taking into account all possible factors that a traditional ATS would consider." | |
Job Description Keywords: | |
{list(job_description_keywords)} | |
Resume Keywords: | |
{list(resume_keywords)} | |
"""} | |
response = chain.invoke(query) | |
memory.save_context(query, {"output": response}) | |
st.write(response) | |
except Exception as e: | |
st.error(f"An error occurred: {e}") | |
else: | |
st.info("Please upload both files to proceed.") |