Resume_Parser / Home.py
ChaitanyaSubhakar's picture
Update Home.py
61a75a7 verified
import os
import shutil
import zipfile
import streamlit as st
from typing import Literal
from pydantic import BaseModel, Field
from langchain_core.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from langchain_community.document_loaders import UnstructuredPDFLoader
# --- CSS ---
st.markdown(
"""
<style>
.stApp {
background-color: midnightblue;
color: white;
}
</style>
""",
unsafe_allow_html=True
)
newhf = st.secrets["HF_TOKEN"]
#BASE_DIR = os.path.dirname(os.path.abspath(__file__))
EXTRACTED_FOLDER = os.path.join("extracted")
SELECTED_FOLDER = os.path.join("selected")
os.makedirs(EXTRACTED_FOLDER, exist_ok=True)
os.makedirs(SELECTED_FOLDER, exist_ok=True)
# --- Set up selected LLM using HuggingFaceEndpoint ---
llm_model = HuggingFaceEndpoint(
repo_id="meta-llama/Llama-3.1-8B-Instruct",
provider="nebius",
api_key=newhf,
temperature=0.7,
max_new_tokens=100,
task="conversational"
)
llm = ChatHuggingFace(
llm=llm_model,
repo_id="meta-llama/Llama-3.1-8B-Instruct",
provider="nebius",
api_key=newhf,
temperature=0.7,
max_new_tokens=100,
task="conversational"
)
# --- Pydantic model for LLM output ---
class JobDesc(BaseModel):
Objective: str = Field(description="from the given job descriptrion extract the Objective")
Name: str = Field(description="from the given job descriptrion extract the Name")
Age: int = Field(description="from the given job descriptrion extract the Age")
Qualification: str = Field(description="from the given job description extract the Qualification")
Skills: list[str] = Field(description="from the given job description extract the Skills")
Experience: list[str] = Field(description="from the given job descriptrion extract the work Experience")
parser = PydanticOutputParser(pydantic_object=JobDesc)
prompt = PromptTemplate(template="""
Extract the following fields from the resume text and return them as a JSON object.
Resume:
{input}
Return JSON matching this format:
{instruction}
""")
st.title("πŸ“„ Resume Screening Application")
uploaded_file = st.file_uploader("Upload ZIP file of resumes", type=["zip"])
parsed_resumes = []
unique_skills = set()
if uploaded_file:
# --- Clean extracted folder ---
if os.path.exists(EXTRACTED_FOLDER):
shutil.rmtree(EXTRACTED_FOLDER)
os.makedirs(EXTRACTED_FOLDER, exist_ok=True)
# --- Save uploaded ZIP ---
zip_path = os.path.join(BASE_DIR, "temp.zip")
with open(zip_path, "wb") as f:
f.write(uploaded_file.read())
# --- Extract ZIP ---
try:
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(EXTRACTED_FOLDER)
st.success(f"βœ… Extracted ZIP to: {EXTRACTED_FOLDER}")
except Exception as e:
st.error(f"❌ Failed to extract ZIP: {e}")
# --- Clean up zip ---
os.remove(zip_path)
# --- List extracted files ---
extracted_files = os.listdir(EXTRACTED_FOLDER)
st.write(f"πŸ—‚οΈ Files extracted:")
st.write(extracted_files)
# --- Process PDFs ---
for filename in extracted_files:
if filename.lower().endswith(".pdf"):
file_path = os.path.join(EXTRACTED_FOLDER, filename)
st.write(f"Processing file: {file_path} | Exists? {os.path.exists(file_path)}")
try:
loader = UnstructuredPDFLoader(file_path, mode="paged")
data = loader.load()
if len(data) > 2:
st.warning(f"❌ Rejected {filename}: More than 2 pages")
continue
full_text = "\n".join([page.page_content for page in data])
formatted_prompt = prompt.format(
input=full_text,
instruction=parser.get_format_instructions()
)
result = llm.invoke(formatted_prompt)
parsed = parser.parse(result.content)
resume_data = parsed.dict()
resume_data["file_path"] = file_path # save path to extracted PDF
parsed_resumes.append(resume_data)
for skill in parsed.Skills:
unique_skills.add(skill.strip())
st.success(f"βœ… Parsed: {parsed.Name}")
st.write(f"πŸ—‚οΈ Resume path saved: {file_path}")
except Exception as e:
st.error(f"❌ Failed to parse {filename}: {e}")
# --- Skill categories ---
skill_categories = {
"Programming Languages": ["Python"],
"Data Analysis & Visualisation": ["Pandas", "Numpy", "Excel", "Matplotlib", "Seaborn"],
"Database Management": ["SQL", "Power BI"],
"Deep Learning": ["ANN", "CNN", "RNN"],
"Machine Learning": ["Scikit-learn", "OpenCV", "NLP", "Supervised learning", "Optuna", "Descriptive Statistics"],
"Generative AI": ["Langchain", "LLMs"]
}
# --- Skill Selection ---
if parsed_resumes:
selected_categories = st.multiselect("Select required skill categories", list(skill_categories.keys()))
if st.button("Evaluate Resumes"):
required_keywords = set()
for category in selected_categories:
required_keywords.update(skill_categories[category])
for resume in parsed_resumes:
if any(req_skill.lower() in (skill.lower() for skill in resume["Skills"]) for req_skill in required_keywords):
st.success(f"βœ… Selected: {resume['Name']}")
source_path = resume["file_path"]
#dest_path = os.path.join(SELECTED_FOLDER, os.path.basename(source_path))
dest_path = os.path.join(SELECTED_FOLDER)
if os.path.exists(source_path):
shutil.copy(source_path, dest_path)
#st.info(f"πŸ“ File copied to selected: {os.path.basename(source_path)}")
st.info(f"πŸ“ File copied to selected")
else:
st.error(f"❌ Could not find file to copy: {source_path}")
else:
st.warning(f"❌ Rejected: {resume['Name']}")