Spaces:

ChaitanyaSubhakar
/

Resume_Parser

Sleeping

App Files Files Community

ChaitanyaSubhakar commited on Jun 1

Commit

b00ce35

verified ·

1 Parent(s): 3b04f33

Update Home.py

Browse files

Files changed (1) hide show

Home.py +65 -98

Home.py CHANGED Viewed

@@ -1,132 +1,99 @@
 import os
 import zipfile
-from pathlib import Path
 import streamlit as st
 from langchain_community.document_loaders import UnstructuredPDFLoader
-from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
 from langchain.prompts import PromptTemplate
-from langchain_core.output_parsers import PydanticOutputParser
 from pydantic import BaseModel, Field
-st.markdown(
-    """
-    <style>
-    .stApp {
-        background-color: midnightblue;
-        color: white;
-    }
-    </style>
-    """,
-    unsafe_allow_html=True
-)
-# Setup your HF token and model (replace with your token handling)
-newhf = st.secrets["HF_TOKEN"]
-# Create folders if not present
-Path("Extracted_Resumes").mkdir(exist_ok=True)
-Path("Selected_Resumes").mkdir(exist_ok=True)
-# Set up the LLaMA model
 llama_model = HuggingFaceEndpoint(
     repo_id="meta-llama/Llama-3.1-8B-Instruct",
     provider="nebius",
     temperature=0.7,
-    api_key=newhf,
     max_new_tokens=512,
     task="conversational"
 )
-model = ChatHuggingFace(
-    llm=llama_model,
-    repo_id="meta-llama/Llama-3.1-8B-Instruct",
-    provider="nebius",
-    temperature=0.7,
-    api_key=newhf,
-    max_new_tokens=512,
-    task="conversational"
-)
-# Pydantic schema for parsing resume content
 class JobDesc(BaseModel):
-    Objective: str = Field(description="Objective")
-    Name: str = Field(description="Name")
-    Age: int = Field(description="Age")
-    Qualification: str = Field(description="Qualification")
-    Skills: list[str] = Field(description="Skills")
-    Experience: float = Field(description="Work Experience")
 parser = PydanticOutputParser(pydantic_object=JobDesc)
-# Prompt template for extracting fields from resume
-pt = PromptTemplate(template="""
-You are an information extraction expert. Extract the following fields from the given resume text and return ONLY the JSON that matches the format.
 Resume Text:
 {input}
-Return ONLY a JSON object in this format:
 {instruction}
-Do not include explanations, code, or markdown.
 """)
-# Streamlit app UI
-st.title("📄 Resume Screening Application")
-uploaded_zip = st.file_uploader("Upload a ZIP file containing resumes", type="zip")
 if uploaded_zip:
-    with zipfile.ZipFile(uploaded_zip, "r") as zip_ref:
-        zip_ref.extractall("Extracted_Resumes")
-    st.success("✅ Resumes extracted successfully!")
-    resumes = list(Path("Extracted_Resumes").glob("*.pdf"))
-    valid_resumes = []
-    # Check for 2-page resumes
-    for pdf in resumes:
         try:
-            loader = UnstructuredPDFLoader(str(pdf), mode="paged")
-            pages = loader.load()
-            if len(pages) == 2:
-                valid_resumes.append((pdf.name, pages))
         except Exception as e:
-            st.warning(f"❌ Could not process {pdf.name}: {e}")
-    if valid_resumes:
-        st.subheader("🔎 Select Required Skills")
-        # Extract skills from all resumes
-        all_resume_skills = set()
-        parsed_resumes = {}
-        for filename, pages in valid_resumes:
-            final_data = [page for page in pages]
-            fp = pt.format(input=final_data, instruction=parser.get_format_instructions())
-            result = model.invoke(fp)
-            parsed_resume = parser.parse(result.content)
-            parsed_resumes[filename] = parsed_resume
-            all_resume_skills.update(map(str.lower, parsed_resume.Skills))
-        selected_skills = st.multiselect("Choose required skills:", sorted(all_resume_skills))
-        # Match resumes based on selected skills
-        for filename, parsed_resume in parsed_resumes.items():
-            resume_skills = parsed_resume.Skills
-            found_skills = [
-                skill for skill in selected_skills
-                if any(skill.lower() in rs.lower() for rs in resume_skills)
-            ]
-            if set(found_skills) == set(selected_skills):
-                src_path = Path("Extracted_Resumes") / filename
-                dest_path = Path("Selected_Resumes") / filename
-                with open(src_path, "rb") as src, open(dest_path, "wb") as dst:
-                    dst.write(src.read())
-                st.success(f"✅ {filename} matches and saved to 'Selected_Resumes'")
-            else:
-                st.info(f"ℹ️ {filename} does not match all selected skills.")
-    else:
-        st.warning("⚠️ No 2-page resumes found.")

 import os
+import shutil
 import zipfile
 import streamlit as st
 from langchain_community.document_loaders import UnstructuredPDFLoader
+from langchain.output_parsers import PydanticOutputParser
 from langchain.prompts import PromptTemplate
+from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
 from pydantic import BaseModel, Field
+# --- Setup Directories ---
+EXTRACTED_DIR = "extracted"
+SELECTED_DIR = "selected"
+os.makedirs(EXTRACTED_DIR, exist_ok=True)
+os.makedirs(SELECTED_DIR, exist_ok=True)
+# --- Model Setup ---
 llama_model = HuggingFaceEndpoint(
     repo_id="meta-llama/Llama-3.1-8B-Instruct",
     provider="nebius",
     temperature=0.7,
     max_new_tokens=512,
     task="conversational"
 )
+model = ChatHuggingFace(llm=llama_model)
+# --- Output Schema ---
 class JobDesc(BaseModel):
+    Objective: str = Field(...)
+    Name: str = Field(...)
+    Age: int = Field(...)
+    Qualification: str = Field(...)
+    Skills: list[str] = Field(...)
+    Experience: float = Field(...)
 parser = PydanticOutputParser(pydantic_object=JobDesc)
+prompt_template = PromptTemplate(template="""
+Extract the following fields from the resume below and return them ONLY as a valid JSON object:
 Resume Text:
 {input}
+Return format:
 {instruction}
+Only return the JSON. No explanation, no markdown.
 """)
+# --- Streamlit UI ---
+st.title("Resume Screening Application")
+uploaded_zip = st.file_uploader("Upload a ZIP file of resumes (PDFs only):", type="zip")
 if uploaded_zip:
+    with zipfile.ZipFile(uploaded_zip, 'r') as zip_ref:
+        zip_ref.extractall(EXTRACTED_DIR)
+    st.success("Resumes extracted!")
+    resume_files = [f for f in os.listdir(EXTRACTED_DIR) if f.endswith(".pdf")]
+    parsed_resumes = []
+    for resume_file in resume_files:
+        path = os.path.join(EXTRACTED_DIR, resume_file)
+        loader = UnstructuredPDFLoader(path, mode="paged")
+        pages = loader.load()
+        if len(pages) > 2:
+            st.warning(f"❌ {resume_file} rejected (more than 2 pages)")
+            continue
+        # Convert pages to string for model input
+        resume_text = "\n".join([p.page_content for p in pages])
+        prompt = prompt_template.format(
+            input=resume_text,
+            instruction=parser.get_format_instructions()
+        )
         try:
+            result = model.invoke(prompt)
+            parsed = parser.parse(result.content)
+            parsed_resumes.append((resume_file, parsed))
+            st.success(f"✅ {resume_file} parsed successfully")
         except Exception as e:
+            st.error(f"❌ Failed to parse {resume_file}: {e}")
+    if parsed_resumes:
+        st.subheader("Select Required Skills")
+        all_skills = sorted(list({skill for _, parsed in parsed_resumes for skill in parsed.Skills}))
+        selected_skills = st.multiselect("Choose required skills:", all_skills)
+        if st.button("Evaluate Resumes"):
+            for resume_file, parsed in parsed_resumes:
+                matched_skills = set(selected_skills).intersection(set(parsed.Skills))
+                if len(matched_skills) == len(selected_skills):
+                    shutil.copy(os.path.join(EXTRACTED_DIR, resume_file), SELECTED_DIR)
+                    st.success(f"🎉 {resume_file} selected and saved in '{SELECTED_DIR}' folder")
+                else:
+                    st.info(f"🔍 {resume_file} does not match all selected skills")