Spaces:

ChaitanyaSubhakar
/

Resume_Parser

Sleeping

App Files Files Community

ChaitanyaSubhakar commited on Jun 2

Commit

efab8b2

verified ·

1 Parent(s): 08ef3c9

Update Home.py

Browse files

Files changed (1) hide show

Home.py +38 -27

Home.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os
 import shutil
 import zipfile
 import streamlit as st
@@ -64,10 +64,8 @@ parser = PydanticOutputParser(pydantic_object=JobDesc)
 prompt = PromptTemplate(template="""
 Extract the following fields from the resume text and return them as a JSON object.
 Resume:
 {input}
 Return JSON matching this format:
 {instruction}
 """)
@@ -80,29 +78,33 @@ parsed_resumes = []
 unique_skills = set()
 if uploaded_file:
-    # Clear Extracted folder before extracting new files
     if os.path.exists(EXTRACTED_FOLDER):
         shutil.rmtree(EXTRACTED_FOLDER)
     os.makedirs(EXTRACTED_FOLDER, exist_ok=True)
-    # Save uploaded ZIP temporarily
     zip_path = os.path.join(BASE_DIR, "temp.zip")
     with open(zip_path, "wb") as f:
         f.write(uploaded_file.read())
-    # Extract ZIP
-    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
-        zip_ref.extractall(EXTRACTED_FOLDER)
-    # Remove temp zip to keep clean
     os.remove(zip_path)
-    # Show files in extracted folder
     extracted_files = os.listdir(EXTRACTED_FOLDER)
-    st.write(f"🗂️ Files extracted in folder `{EXTRACTED_FOLDER}`:")
     st.write(extracted_files)
-    # Process each PDF resume in Extracted folder
     for filename in extracted_files:
         if filename.lower().endswith(".pdf"):
             file_path = os.path.join(EXTRACTED_FOLDER, filename)
@@ -111,7 +113,6 @@ if uploaded_file:
                 loader = UnstructuredPDFLoader(file_path, mode="paged")
                 data = loader.load()
-                # Reject if more than 2 pages
                 if len(data) > 2:
                     st.warning(f"❌ Rejected {filename}: More than 2 pages")
                     continue
@@ -123,42 +124,52 @@ if uploaded_file:
                     instruction=parser.get_format_instructions()
                 )
-                # Call your LLM properly
                 result = llm.invoke(formatted_prompt)
                 parsed = parser.parse(result.content)
                 resume_data = parsed.dict()
-                resume_data["file_path"] = file_path
                 parsed_resumes.append(resume_data)
                 for skill in parsed.Skills:
                     unique_skills.add(skill.strip())
                 st.success(f"✅ Parsed: {parsed.Name}")
             except Exception as e:
                 st.error(f"❌ Failed to parse {filename}: {e}")
     skill_categories = {
-                            "Programming Languages" : ["Python"],
-                            "Data Analysis & Visualisation": ["Pandas", "Numpy", "Excel", "Matplotlib", "Seaborn"],
-                            "Database Management": ["SQL", "Power BI"],
-                            "Deep Learning": ["ANN", "CNN", "RNN"],
-                            "Machine Learning": ["Scikit-learn", "OpenCV", "NLP", "Supervised learning", "Optuna", "Descriptive Statistics"],
-                            "Generative AI": ["Langchain", "LLMs"]
-                        }
     if parsed_resumes:
         selected_categories = st.multiselect("Select required skill categories", list(skill_categories.keys()))
         if st.button("Evaluate Resumes"):
             required_keywords = set()
             for category in selected_categories:
                 required_keywords.update(skill_categories[category])
             for resume in parsed_resumes:
-                # Match: If any required keyword is in the resume skills
                 if any(req_skill.lower() in (skill.lower() for skill in resume["Skills"]) for req_skill in required_keywords):
                     st.success(f"✅ Selected: {resume['Name']}")
-                    shutil.copy(resume["file_path"], os.path.join(SELECTED_FOLDER, os.path.basename(resume["file_path"])))
                 else:
                     st.warning(f"❌ Rejected: {resume['Name']}")

+import os
 import shutil
 import zipfile
 import streamlit as st
 prompt = PromptTemplate(template="""
 Extract the following fields from the resume text and return them as a JSON object.
 Resume:
 {input}
 Return JSON matching this format:
 {instruction}
 """)
 unique_skills = set()
 if uploaded_file:
+    # --- Clean extracted folder ---
     if os.path.exists(EXTRACTED_FOLDER):
         shutil.rmtree(EXTRACTED_FOLDER)
     os.makedirs(EXTRACTED_FOLDER, exist_ok=True)
+    # --- Save uploaded ZIP ---
     zip_path = os.path.join(BASE_DIR, "temp.zip")
     with open(zip_path, "wb") as f:
         f.write(uploaded_file.read())
+    # --- Extract ZIP ---
+    try:
+        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+            zip_ref.extractall(EXTRACTED_FOLDER)
+        st.success(f"✅ Extracted ZIP to: {EXTRACTED_FOLDER}")
+    except Exception as e:
+        st.error(f"❌ Failed to extract ZIP: {e}")
+    # --- Clean up zip ---
     os.remove(zip_path)
+    # --- List extracted files ---
     extracted_files = os.listdir(EXTRACTED_FOLDER)
+    st.write(f"🗂️ Files extracted:")
     st.write(extracted_files)
+    # --- Process PDFs ---
     for filename in extracted_files:
         if filename.lower().endswith(".pdf"):
             file_path = os.path.join(EXTRACTED_FOLDER, filename)
                 loader = UnstructuredPDFLoader(file_path, mode="paged")
                 data = loader.load()
                 if len(data) > 2:
                     st.warning(f"❌ Rejected {filename}: More than 2 pages")
                     continue
                     instruction=parser.get_format_instructions()
                 )
                 result = llm.invoke(formatted_prompt)
                 parsed = parser.parse(result.content)
                 resume_data = parsed.dict()
+                resume_data["file_path"] = file_path  # save path to extracted PDF
                 parsed_resumes.append(resume_data)
                 for skill in parsed.Skills:
                     unique_skills.add(skill.strip())
                 st.success(f"✅ Parsed: {parsed.Name}")
+                st.write(f"🗂️ Resume path saved: {file_path}")
             except Exception as e:
                 st.error(f"❌ Failed to parse {filename}: {e}")
+    # --- Skill categories ---
     skill_categories = {
+        "Programming Languages": ["Python"],
+        "Data Analysis & Visualisation": ["Pandas", "Numpy", "Excel", "Matplotlib", "Seaborn"],
+        "Database Management": ["SQL", "Power BI"],
+        "Deep Learning": ["ANN", "CNN", "RNN"],
+        "Machine Learning": ["Scikit-learn", "OpenCV", "NLP", "Supervised learning", "Optuna", "Descriptive Statistics"],
+        "Generative AI": ["Langchain", "LLMs"]
+    }
+    # --- Skill Selection ---
     if parsed_resumes:
         selected_categories = st.multiselect("Select required skill categories", list(skill_categories.keys()))
         if st.button("Evaluate Resumes"):
             required_keywords = set()
             for category in selected_categories:
                 required_keywords.update(skill_categories[category])
             for resume in parsed_resumes:
                 if any(req_skill.lower() in (skill.lower() for skill in resume["Skills"]) for req_skill in required_keywords):
                     st.success(f"✅ Selected: {resume['Name']}")
+                    source_path = resume["file_path"]
+                    dest_path = os.path.join(SELECTED_FOLDER, os.path.basename(source_path))
+                    if os.path.exists(source_path):
+                        shutil.copy(source_path, dest_path)
+                        st.info(f"📁 File copied to selected: {os.path.basename(source_path)}")
+                    else:
+                        st.error(f"❌ Could not find file to copy: {source_path}")
                 else:
                     st.warning(f"❌ Rejected: {resume['Name']}")