Spaces:

ChaitanyaSubhakar
/

Resume_Parser

Sleeping

App Files Files Community

Resume_Parser / Home.py

ChaitanyaSubhakar

Update Home.py

61a75a7 verified 5 months ago

raw

history blame contribute delete

6.34 kB

	import os
	import shutil
	import zipfile
	import streamlit as st
	from typing import Literal
	from pydantic import BaseModel, Field
	from langchain_core.output_parsers import PydanticOutputParser
	from langchain.prompts import PromptTemplate
	from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
	from langchain_community.document_loaders import UnstructuredPDFLoader

	# --- CSS ---
	st.markdown(
	"""
	<style>
	.stApp {
	background-color: midnightblue;
	color: white;
	}
	</style>
	""",
	unsafe_allow_html=True
	)

	newhf = st.secrets["HF_TOKEN"]

	#BASE_DIR = os.path.dirname(os.path.abspath(__file__))
	EXTRACTED_FOLDER = os.path.join("extracted")
	SELECTED_FOLDER = os.path.join("selected")

	os.makedirs(EXTRACTED_FOLDER, exist_ok=True)
	os.makedirs(SELECTED_FOLDER, exist_ok=True)

	# --- Set up selected LLM using HuggingFaceEndpoint ---
	llm_model = HuggingFaceEndpoint(
	repo_id="meta-llama/Llama-3.1-8B-Instruct",
	provider="nebius",
	api_key=newhf,
	temperature=0.7,
	max_new_tokens=100,
	task="conversational"
	)

	llm = ChatHuggingFace(
	llm=llm_model,
	repo_id="meta-llama/Llama-3.1-8B-Instruct",
	provider="nebius",
	api_key=newhf,
	temperature=0.7,
	max_new_tokens=100,
	task="conversational"
	)

	# --- Pydantic model for LLM output ---
	class JobDesc(BaseModel):
	Objective: str = Field(description="from the given job descriptrion extract the Objective")
	Name: str = Field(description="from the given job descriptrion extract the Name")
	Age: int = Field(description="from the given job descriptrion extract the Age")
	Qualification: str = Field(description="from the given job description extract the Qualification")
	Skills: list[str] = Field(description="from the given job description extract the Skills")
	Experience: list[str] = Field(description="from the given job descriptrion extract the work Experience")

	parser = PydanticOutputParser(pydantic_object=JobDesc)

	prompt = PromptTemplate(template="""
	Extract the following fields from the resume text and return them as a JSON object.
	Resume:
	{input}
	Return JSON matching this format:
	{instruction}
	""")

	st.title("📄 Resume Screening Application")

	uploaded_file = st.file_uploader("Upload ZIP file of resumes", type=["zip"])

	parsed_resumes = []
	unique_skills = set()

	if uploaded_file:
	# --- Clean extracted folder ---
	if os.path.exists(EXTRACTED_FOLDER):
	shutil.rmtree(EXTRACTED_FOLDER)
	os.makedirs(EXTRACTED_FOLDER, exist_ok=True)

	# --- Save uploaded ZIP ---
	zip_path = os.path.join(BASE_DIR, "temp.zip")
	with open(zip_path, "wb") as f:
	f.write(uploaded_file.read())

	# --- Extract ZIP ---
	try:
	with zipfile.ZipFile(zip_path, 'r') as zip_ref:
	zip_ref.extractall(EXTRACTED_FOLDER)
	st.success(f"✅ Extracted ZIP to: {EXTRACTED_FOLDER}")
	except Exception as e:
	st.error(f"❌ Failed to extract ZIP: {e}")

	# --- Clean up zip ---
	os.remove(zip_path)

	# --- List extracted files ---
	extracted_files = os.listdir(EXTRACTED_FOLDER)
	st.write(f"🗂️ Files extracted:")
	st.write(extracted_files)

	# --- Process PDFs ---
	for filename in extracted_files:
	if filename.lower().endswith(".pdf"):
	file_path = os.path.join(EXTRACTED_FOLDER, filename)
	st.write(f"Processing file: {file_path} \| Exists? {os.path.exists(file_path)}")
	try:
	loader = UnstructuredPDFLoader(file_path, mode="paged")
	data = loader.load()

	if len(data) > 2:
	st.warning(f"❌ Rejected {filename}: More than 2 pages")
	continue

	full_text = "\n".join([page.page_content for page in data])

	formatted_prompt = prompt.format(
	input=full_text,
	instruction=parser.get_format_instructions()
	)

	result = llm.invoke(formatted_prompt)
	parsed = parser.parse(result.content)

	resume_data = parsed.dict()
	resume_data["file_path"] = file_path # save path to extracted PDF
	parsed_resumes.append(resume_data)

	for skill in parsed.Skills:
	unique_skills.add(skill.strip())

	st.success(f"✅ Parsed: {parsed.Name}")
	st.write(f"🗂️ Resume path saved: {file_path}")

	except Exception as e:
	st.error(f"❌ Failed to parse {filename}: {e}")

	# --- Skill categories ---
	skill_categories = {
	"Programming Languages": ["Python"],
	"Data Analysis & Visualisation": ["Pandas", "Numpy", "Excel", "Matplotlib", "Seaborn"],
	"Database Management": ["SQL", "Power BI"],
	"Deep Learning": ["ANN", "CNN", "RNN"],
	"Machine Learning": ["Scikit-learn", "OpenCV", "NLP", "Supervised learning", "Optuna", "Descriptive Statistics"],
	"Generative AI": ["Langchain", "LLMs"]
	}

	# --- Skill Selection ---
	if parsed_resumes:
	selected_categories = st.multiselect("Select required skill categories", list(skill_categories.keys()))

	if st.button("Evaluate Resumes"):
	required_keywords = set()
	for category in selected_categories:
	required_keywords.update(skill_categories[category])

	for resume in parsed_resumes:
	if any(req_skill.lower() in (skill.lower() for skill in resume["Skills"]) for req_skill in required_keywords):
	st.success(f"✅ Selected: {resume['Name']}")

	source_path = resume["file_path"]
	#dest_path = os.path.join(SELECTED_FOLDER, os.path.basename(source_path))
	dest_path = os.path.join(SELECTED_FOLDER)

	if os.path.exists(source_path):
	shutil.copy(source_path, dest_path)
	#st.info(f"📁 File copied to selected: {os.path.basename(source_path)}")
	st.info(f"📁 File copied to selected")
	else:
	st.error(f"❌ Could not find file to copy: {source_path}")
	else:
	st.warning(f"❌ Rejected: {resume['Name']}")