Spaces:
Sleeping
Sleeping
| import os | |
| import shutil | |
| import zipfile | |
| import streamlit as st | |
| from typing import Literal | |
| from pydantic import BaseModel, Field | |
| from langchain_core.output_parsers import PydanticOutputParser | |
| from langchain.prompts import PromptTemplate | |
| from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint | |
| from langchain_community.document_loaders import UnstructuredPDFLoader | |
| # --- CSS --- | |
| st.markdown( | |
| """ | |
| <style> | |
| .stApp { | |
| background-color: midnightblue; | |
| color: white; | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| newhf = st.secrets["HF_TOKEN"] | |
| #BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| EXTRACTED_FOLDER = os.path.join("extracted") | |
| SELECTED_FOLDER = os.path.join("selected") | |
| os.makedirs(EXTRACTED_FOLDER, exist_ok=True) | |
| os.makedirs(SELECTED_FOLDER, exist_ok=True) | |
| # --- Set up selected LLM using HuggingFaceEndpoint --- | |
| llm_model = HuggingFaceEndpoint( | |
| repo_id="meta-llama/Llama-3.1-8B-Instruct", | |
| provider="nebius", | |
| api_key=newhf, | |
| temperature=0.7, | |
| max_new_tokens=100, | |
| task="conversational" | |
| ) | |
| llm = ChatHuggingFace( | |
| llm=llm_model, | |
| repo_id="meta-llama/Llama-3.1-8B-Instruct", | |
| provider="nebius", | |
| api_key=newhf, | |
| temperature=0.7, | |
| max_new_tokens=100, | |
| task="conversational" | |
| ) | |
| # --- Pydantic model for LLM output --- | |
| class JobDesc(BaseModel): | |
| Objective: str = Field(description="from the given job descriptrion extract the Objective") | |
| Name: str = Field(description="from the given job descriptrion extract the Name") | |
| Age: int = Field(description="from the given job descriptrion extract the Age") | |
| Qualification: str = Field(description="from the given job description extract the Qualification") | |
| Skills: list[str] = Field(description="from the given job description extract the Skills") | |
| Experience: list[str] = Field(description="from the given job descriptrion extract the work Experience") | |
| parser = PydanticOutputParser(pydantic_object=JobDesc) | |
| prompt = PromptTemplate(template=""" | |
| Extract the following fields from the resume text and return them as a JSON object. | |
| Resume: | |
| {input} | |
| Return JSON matching this format: | |
| {instruction} | |
| """) | |
| st.title("π Resume Screening Application") | |
| uploaded_file = st.file_uploader("Upload ZIP file of resumes", type=["zip"]) | |
| parsed_resumes = [] | |
| unique_skills = set() | |
| if uploaded_file: | |
| # --- Clean extracted folder --- | |
| if os.path.exists(EXTRACTED_FOLDER): | |
| shutil.rmtree(EXTRACTED_FOLDER) | |
| os.makedirs(EXTRACTED_FOLDER, exist_ok=True) | |
| # --- Save uploaded ZIP --- | |
| zip_path = os.path.join(BASE_DIR, "temp.zip") | |
| with open(zip_path, "wb") as f: | |
| f.write(uploaded_file.read()) | |
| # --- Extract ZIP --- | |
| try: | |
| with zipfile.ZipFile(zip_path, 'r') as zip_ref: | |
| zip_ref.extractall(EXTRACTED_FOLDER) | |
| st.success(f"β Extracted ZIP to: {EXTRACTED_FOLDER}") | |
| except Exception as e: | |
| st.error(f"β Failed to extract ZIP: {e}") | |
| # --- Clean up zip --- | |
| os.remove(zip_path) | |
| # --- List extracted files --- | |
| extracted_files = os.listdir(EXTRACTED_FOLDER) | |
| st.write(f"ποΈ Files extracted:") | |
| st.write(extracted_files) | |
| # --- Process PDFs --- | |
| for filename in extracted_files: | |
| if filename.lower().endswith(".pdf"): | |
| file_path = os.path.join(EXTRACTED_FOLDER, filename) | |
| st.write(f"Processing file: {file_path} | Exists? {os.path.exists(file_path)}") | |
| try: | |
| loader = UnstructuredPDFLoader(file_path, mode="paged") | |
| data = loader.load() | |
| if len(data) > 2: | |
| st.warning(f"β Rejected {filename}: More than 2 pages") | |
| continue | |
| full_text = "\n".join([page.page_content for page in data]) | |
| formatted_prompt = prompt.format( | |
| input=full_text, | |
| instruction=parser.get_format_instructions() | |
| ) | |
| result = llm.invoke(formatted_prompt) | |
| parsed = parser.parse(result.content) | |
| resume_data = parsed.dict() | |
| resume_data["file_path"] = file_path # save path to extracted PDF | |
| parsed_resumes.append(resume_data) | |
| for skill in parsed.Skills: | |
| unique_skills.add(skill.strip()) | |
| st.success(f"β Parsed: {parsed.Name}") | |
| st.write(f"ποΈ Resume path saved: {file_path}") | |
| except Exception as e: | |
| st.error(f"β Failed to parse {filename}: {e}") | |
| # --- Skill categories --- | |
| skill_categories = { | |
| "Programming Languages": ["Python"], | |
| "Data Analysis & Visualisation": ["Pandas", "Numpy", "Excel", "Matplotlib", "Seaborn"], | |
| "Database Management": ["SQL", "Power BI"], | |
| "Deep Learning": ["ANN", "CNN", "RNN"], | |
| "Machine Learning": ["Scikit-learn", "OpenCV", "NLP", "Supervised learning", "Optuna", "Descriptive Statistics"], | |
| "Generative AI": ["Langchain", "LLMs"] | |
| } | |
| # --- Skill Selection --- | |
| if parsed_resumes: | |
| selected_categories = st.multiselect("Select required skill categories", list(skill_categories.keys())) | |
| if st.button("Evaluate Resumes"): | |
| required_keywords = set() | |
| for category in selected_categories: | |
| required_keywords.update(skill_categories[category]) | |
| for resume in parsed_resumes: | |
| if any(req_skill.lower() in (skill.lower() for skill in resume["Skills"]) for req_skill in required_keywords): | |
| st.success(f"β Selected: {resume['Name']}") | |
| source_path = resume["file_path"] | |
| #dest_path = os.path.join(SELECTED_FOLDER, os.path.basename(source_path)) | |
| dest_path = os.path.join(SELECTED_FOLDER) | |
| if os.path.exists(source_path): | |
| shutil.copy(source_path, dest_path) | |
| #st.info(f"π File copied to selected: {os.path.basename(source_path)}") | |
| st.info(f"π File copied to selected") | |
| else: | |
| st.error(f"β Could not find file to copy: {source_path}") | |
| else: | |
| st.warning(f"β Rejected: {resume['Name']}") | |