Spaces:
Sleeping
Sleeping
Update Home.py
Browse files
Home.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
import os
|
| 2 |
import shutil
|
| 3 |
import zipfile
|
| 4 |
import streamlit as st
|
|
@@ -64,10 +64,8 @@ parser = PydanticOutputParser(pydantic_object=JobDesc)
|
|
| 64 |
|
| 65 |
prompt = PromptTemplate(template="""
|
| 66 |
Extract the following fields from the resume text and return them as a JSON object.
|
| 67 |
-
|
| 68 |
Resume:
|
| 69 |
{input}
|
| 70 |
-
|
| 71 |
Return JSON matching this format:
|
| 72 |
{instruction}
|
| 73 |
""")
|
|
@@ -80,29 +78,33 @@ parsed_resumes = []
|
|
| 80 |
unique_skills = set()
|
| 81 |
|
| 82 |
if uploaded_file:
|
| 83 |
-
#
|
| 84 |
if os.path.exists(EXTRACTED_FOLDER):
|
| 85 |
shutil.rmtree(EXTRACTED_FOLDER)
|
| 86 |
os.makedirs(EXTRACTED_FOLDER, exist_ok=True)
|
| 87 |
|
| 88 |
-
# Save uploaded ZIP
|
| 89 |
zip_path = os.path.join(BASE_DIR, "temp.zip")
|
| 90 |
with open(zip_path, "wb") as f:
|
| 91 |
f.write(uploaded_file.read())
|
| 92 |
|
| 93 |
-
# Extract ZIP
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
os.remove(zip_path)
|
| 99 |
|
| 100 |
-
#
|
| 101 |
extracted_files = os.listdir(EXTRACTED_FOLDER)
|
| 102 |
-
st.write(f"ποΈ Files extracted
|
| 103 |
st.write(extracted_files)
|
| 104 |
|
| 105 |
-
# Process
|
| 106 |
for filename in extracted_files:
|
| 107 |
if filename.lower().endswith(".pdf"):
|
| 108 |
file_path = os.path.join(EXTRACTED_FOLDER, filename)
|
|
@@ -111,7 +113,6 @@ if uploaded_file:
|
|
| 111 |
loader = UnstructuredPDFLoader(file_path, mode="paged")
|
| 112 |
data = loader.load()
|
| 113 |
|
| 114 |
-
# Reject if more than 2 pages
|
| 115 |
if len(data) > 2:
|
| 116 |
st.warning(f"β Rejected {filename}: More than 2 pages")
|
| 117 |
continue
|
|
@@ -123,42 +124,52 @@ if uploaded_file:
|
|
| 123 |
instruction=parser.get_format_instructions()
|
| 124 |
)
|
| 125 |
|
| 126 |
-
# Call your LLM properly
|
| 127 |
result = llm.invoke(formatted_prompt)
|
| 128 |
parsed = parser.parse(result.content)
|
| 129 |
|
| 130 |
resume_data = parsed.dict()
|
| 131 |
-
resume_data["file_path"] = file_path
|
| 132 |
parsed_resumes.append(resume_data)
|
| 133 |
|
| 134 |
for skill in parsed.Skills:
|
| 135 |
unique_skills.add(skill.strip())
|
| 136 |
|
| 137 |
st.success(f"β
Parsed: {parsed.Name}")
|
|
|
|
| 138 |
|
| 139 |
except Exception as e:
|
| 140 |
st.error(f"β Failed to parse {filename}: {e}")
|
|
|
|
|
|
|
| 141 |
skill_categories = {
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
|
|
|
|
| 150 |
if parsed_resumes:
|
| 151 |
selected_categories = st.multiselect("Select required skill categories", list(skill_categories.keys()))
|
| 152 |
-
|
| 153 |
if st.button("Evaluate Resumes"):
|
| 154 |
required_keywords = set()
|
| 155 |
for category in selected_categories:
|
| 156 |
required_keywords.update(skill_categories[category])
|
| 157 |
-
|
| 158 |
for resume in parsed_resumes:
|
| 159 |
-
# Match: If any required keyword is in the resume skills
|
| 160 |
if any(req_skill.lower() in (skill.lower() for skill in resume["Skills"]) for req_skill in required_keywords):
|
| 161 |
st.success(f"β
Selected: {resume['Name']}")
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
else:
|
| 164 |
st.warning(f"β Rejected: {resume['Name']}")
|
|
|
|
| 1 |
+
import os
|
| 2 |
import shutil
|
| 3 |
import zipfile
|
| 4 |
import streamlit as st
|
|
|
|
| 64 |
|
| 65 |
prompt = PromptTemplate(template="""
|
| 66 |
Extract the following fields from the resume text and return them as a JSON object.
|
|
|
|
| 67 |
Resume:
|
| 68 |
{input}
|
|
|
|
| 69 |
Return JSON matching this format:
|
| 70 |
{instruction}
|
| 71 |
""")
|
|
|
|
| 78 |
unique_skills = set()
|
| 79 |
|
| 80 |
if uploaded_file:
|
| 81 |
+
# --- Clean extracted folder ---
|
| 82 |
if os.path.exists(EXTRACTED_FOLDER):
|
| 83 |
shutil.rmtree(EXTRACTED_FOLDER)
|
| 84 |
os.makedirs(EXTRACTED_FOLDER, exist_ok=True)
|
| 85 |
|
| 86 |
+
# --- Save uploaded ZIP ---
|
| 87 |
zip_path = os.path.join(BASE_DIR, "temp.zip")
|
| 88 |
with open(zip_path, "wb") as f:
|
| 89 |
f.write(uploaded_file.read())
|
| 90 |
|
| 91 |
+
# --- Extract ZIP ---
|
| 92 |
+
try:
|
| 93 |
+
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
| 94 |
+
zip_ref.extractall(EXTRACTED_FOLDER)
|
| 95 |
+
st.success(f"β
Extracted ZIP to: {EXTRACTED_FOLDER}")
|
| 96 |
+
except Exception as e:
|
| 97 |
+
st.error(f"β Failed to extract ZIP: {e}")
|
| 98 |
+
|
| 99 |
+
# --- Clean up zip ---
|
| 100 |
os.remove(zip_path)
|
| 101 |
|
| 102 |
+
# --- List extracted files ---
|
| 103 |
extracted_files = os.listdir(EXTRACTED_FOLDER)
|
| 104 |
+
st.write(f"ποΈ Files extracted:")
|
| 105 |
st.write(extracted_files)
|
| 106 |
|
| 107 |
+
# --- Process PDFs ---
|
| 108 |
for filename in extracted_files:
|
| 109 |
if filename.lower().endswith(".pdf"):
|
| 110 |
file_path = os.path.join(EXTRACTED_FOLDER, filename)
|
|
|
|
| 113 |
loader = UnstructuredPDFLoader(file_path, mode="paged")
|
| 114 |
data = loader.load()
|
| 115 |
|
|
|
|
| 116 |
if len(data) > 2:
|
| 117 |
st.warning(f"β Rejected {filename}: More than 2 pages")
|
| 118 |
continue
|
|
|
|
| 124 |
instruction=parser.get_format_instructions()
|
| 125 |
)
|
| 126 |
|
|
|
|
| 127 |
result = llm.invoke(formatted_prompt)
|
| 128 |
parsed = parser.parse(result.content)
|
| 129 |
|
| 130 |
resume_data = parsed.dict()
|
| 131 |
+
resume_data["file_path"] = file_path # save path to extracted PDF
|
| 132 |
parsed_resumes.append(resume_data)
|
| 133 |
|
| 134 |
for skill in parsed.Skills:
|
| 135 |
unique_skills.add(skill.strip())
|
| 136 |
|
| 137 |
st.success(f"β
Parsed: {parsed.Name}")
|
| 138 |
+
st.write(f"ποΈ Resume path saved: {file_path}")
|
| 139 |
|
| 140 |
except Exception as e:
|
| 141 |
st.error(f"β Failed to parse {filename}: {e}")
|
| 142 |
+
|
| 143 |
+
# --- Skill categories ---
|
| 144 |
skill_categories = {
|
| 145 |
+
"Programming Languages": ["Python"],
|
| 146 |
+
"Data Analysis & Visualisation": ["Pandas", "Numpy", "Excel", "Matplotlib", "Seaborn"],
|
| 147 |
+
"Database Management": ["SQL", "Power BI"],
|
| 148 |
+
"Deep Learning": ["ANN", "CNN", "RNN"],
|
| 149 |
+
"Machine Learning": ["Scikit-learn", "OpenCV", "NLP", "Supervised learning", "Optuna", "Descriptive Statistics"],
|
| 150 |
+
"Generative AI": ["Langchain", "LLMs"]
|
| 151 |
+
}
|
| 152 |
|
| 153 |
+
# --- Skill Selection ---
|
| 154 |
if parsed_resumes:
|
| 155 |
selected_categories = st.multiselect("Select required skill categories", list(skill_categories.keys()))
|
| 156 |
+
|
| 157 |
if st.button("Evaluate Resumes"):
|
| 158 |
required_keywords = set()
|
| 159 |
for category in selected_categories:
|
| 160 |
required_keywords.update(skill_categories[category])
|
| 161 |
+
|
| 162 |
for resume in parsed_resumes:
|
|
|
|
| 163 |
if any(req_skill.lower() in (skill.lower() for skill in resume["Skills"]) for req_skill in required_keywords):
|
| 164 |
st.success(f"β
Selected: {resume['Name']}")
|
| 165 |
+
|
| 166 |
+
source_path = resume["file_path"]
|
| 167 |
+
dest_path = os.path.join(SELECTED_FOLDER, os.path.basename(source_path))
|
| 168 |
+
|
| 169 |
+
if os.path.exists(source_path):
|
| 170 |
+
shutil.copy(source_path, dest_path)
|
| 171 |
+
st.info(f"π File copied to selected: {os.path.basename(source_path)}")
|
| 172 |
+
else:
|
| 173 |
+
st.error(f"β Could not find file to copy: {source_path}")
|
| 174 |
else:
|
| 175 |
st.warning(f"β Rejected: {resume['Name']}")
|