ChaitanyaSubhakar commited on
Commit
efab8b2
Β·
verified Β·
1 Parent(s): 08ef3c9

Update Home.py

Browse files
Files changed (1) hide show
  1. Home.py +38 -27
Home.py CHANGED
@@ -1,4 +1,4 @@
1
- import os
2
  import shutil
3
  import zipfile
4
  import streamlit as st
@@ -64,10 +64,8 @@ parser = PydanticOutputParser(pydantic_object=JobDesc)
64
 
65
  prompt = PromptTemplate(template="""
66
  Extract the following fields from the resume text and return them as a JSON object.
67
-
68
  Resume:
69
  {input}
70
-
71
  Return JSON matching this format:
72
  {instruction}
73
  """)
@@ -80,29 +78,33 @@ parsed_resumes = []
80
  unique_skills = set()
81
 
82
  if uploaded_file:
83
- # Clear Extracted folder before extracting new files
84
  if os.path.exists(EXTRACTED_FOLDER):
85
  shutil.rmtree(EXTRACTED_FOLDER)
86
  os.makedirs(EXTRACTED_FOLDER, exist_ok=True)
87
 
88
- # Save uploaded ZIP temporarily
89
  zip_path = os.path.join(BASE_DIR, "temp.zip")
90
  with open(zip_path, "wb") as f:
91
  f.write(uploaded_file.read())
92
 
93
- # Extract ZIP
94
- with zipfile.ZipFile(zip_path, 'r') as zip_ref:
95
- zip_ref.extractall(EXTRACTED_FOLDER)
96
-
97
- # Remove temp zip to keep clean
 
 
 
 
98
  os.remove(zip_path)
99
 
100
- # Show files in extracted folder
101
  extracted_files = os.listdir(EXTRACTED_FOLDER)
102
- st.write(f"πŸ—‚οΈ Files extracted in folder `{EXTRACTED_FOLDER}`:")
103
  st.write(extracted_files)
104
 
105
- # Process each PDF resume in Extracted folder
106
  for filename in extracted_files:
107
  if filename.lower().endswith(".pdf"):
108
  file_path = os.path.join(EXTRACTED_FOLDER, filename)
@@ -111,7 +113,6 @@ if uploaded_file:
111
  loader = UnstructuredPDFLoader(file_path, mode="paged")
112
  data = loader.load()
113
 
114
- # Reject if more than 2 pages
115
  if len(data) > 2:
116
  st.warning(f"❌ Rejected {filename}: More than 2 pages")
117
  continue
@@ -123,42 +124,52 @@ if uploaded_file:
123
  instruction=parser.get_format_instructions()
124
  )
125
 
126
- # Call your LLM properly
127
  result = llm.invoke(formatted_prompt)
128
  parsed = parser.parse(result.content)
129
 
130
  resume_data = parsed.dict()
131
- resume_data["file_path"] = file_path
132
  parsed_resumes.append(resume_data)
133
 
134
  for skill in parsed.Skills:
135
  unique_skills.add(skill.strip())
136
 
137
  st.success(f"βœ… Parsed: {parsed.Name}")
 
138
 
139
  except Exception as e:
140
  st.error(f"❌ Failed to parse {filename}: {e}")
 
 
141
  skill_categories = {
142
- "Programming Languages" : ["Python"],
143
- "Data Analysis & Visualisation": ["Pandas", "Numpy", "Excel", "Matplotlib", "Seaborn"],
144
- "Database Management": ["SQL", "Power BI"],
145
- "Deep Learning": ["ANN", "CNN", "RNN"],
146
- "Machine Learning": ["Scikit-learn", "OpenCV", "NLP", "Supervised learning", "Optuna", "Descriptive Statistics"],
147
- "Generative AI": ["Langchain", "LLMs"]
148
- }
149
 
 
150
  if parsed_resumes:
151
  selected_categories = st.multiselect("Select required skill categories", list(skill_categories.keys()))
152
-
153
  if st.button("Evaluate Resumes"):
154
  required_keywords = set()
155
  for category in selected_categories:
156
  required_keywords.update(skill_categories[category])
157
-
158
  for resume in parsed_resumes:
159
- # Match: If any required keyword is in the resume skills
160
  if any(req_skill.lower() in (skill.lower() for skill in resume["Skills"]) for req_skill in required_keywords):
161
  st.success(f"βœ… Selected: {resume['Name']}")
162
- shutil.copy(resume["file_path"], os.path.join(SELECTED_FOLDER, os.path.basename(resume["file_path"])))
 
 
 
 
 
 
 
 
163
  else:
164
  st.warning(f"❌ Rejected: {resume['Name']}")
 
1
+ import os
2
  import shutil
3
  import zipfile
4
  import streamlit as st
 
64
 
65
  prompt = PromptTemplate(template="""
66
  Extract the following fields from the resume text and return them as a JSON object.
 
67
  Resume:
68
  {input}
 
69
  Return JSON matching this format:
70
  {instruction}
71
  """)
 
78
  unique_skills = set()
79
 
80
  if uploaded_file:
81
+ # --- Clean extracted folder ---
82
  if os.path.exists(EXTRACTED_FOLDER):
83
  shutil.rmtree(EXTRACTED_FOLDER)
84
  os.makedirs(EXTRACTED_FOLDER, exist_ok=True)
85
 
86
+ # --- Save uploaded ZIP ---
87
  zip_path = os.path.join(BASE_DIR, "temp.zip")
88
  with open(zip_path, "wb") as f:
89
  f.write(uploaded_file.read())
90
 
91
+ # --- Extract ZIP ---
92
+ try:
93
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
94
+ zip_ref.extractall(EXTRACTED_FOLDER)
95
+ st.success(f"βœ… Extracted ZIP to: {EXTRACTED_FOLDER}")
96
+ except Exception as e:
97
+ st.error(f"❌ Failed to extract ZIP: {e}")
98
+
99
+ # --- Clean up zip ---
100
  os.remove(zip_path)
101
 
102
+ # --- List extracted files ---
103
  extracted_files = os.listdir(EXTRACTED_FOLDER)
104
+ st.write(f"πŸ—‚οΈ Files extracted:")
105
  st.write(extracted_files)
106
 
107
+ # --- Process PDFs ---
108
  for filename in extracted_files:
109
  if filename.lower().endswith(".pdf"):
110
  file_path = os.path.join(EXTRACTED_FOLDER, filename)
 
113
  loader = UnstructuredPDFLoader(file_path, mode="paged")
114
  data = loader.load()
115
 
 
116
  if len(data) > 2:
117
  st.warning(f"❌ Rejected {filename}: More than 2 pages")
118
  continue
 
124
  instruction=parser.get_format_instructions()
125
  )
126
 
 
127
  result = llm.invoke(formatted_prompt)
128
  parsed = parser.parse(result.content)
129
 
130
  resume_data = parsed.dict()
131
+ resume_data["file_path"] = file_path # save path to extracted PDF
132
  parsed_resumes.append(resume_data)
133
 
134
  for skill in parsed.Skills:
135
  unique_skills.add(skill.strip())
136
 
137
  st.success(f"βœ… Parsed: {parsed.Name}")
138
+ st.write(f"πŸ—‚οΈ Resume path saved: {file_path}")
139
 
140
  except Exception as e:
141
  st.error(f"❌ Failed to parse {filename}: {e}")
142
+
143
+ # --- Skill categories ---
144
  skill_categories = {
145
+ "Programming Languages": ["Python"],
146
+ "Data Analysis & Visualisation": ["Pandas", "Numpy", "Excel", "Matplotlib", "Seaborn"],
147
+ "Database Management": ["SQL", "Power BI"],
148
+ "Deep Learning": ["ANN", "CNN", "RNN"],
149
+ "Machine Learning": ["Scikit-learn", "OpenCV", "NLP", "Supervised learning", "Optuna", "Descriptive Statistics"],
150
+ "Generative AI": ["Langchain", "LLMs"]
151
+ }
152
 
153
+ # --- Skill Selection ---
154
  if parsed_resumes:
155
  selected_categories = st.multiselect("Select required skill categories", list(skill_categories.keys()))
156
+
157
  if st.button("Evaluate Resumes"):
158
  required_keywords = set()
159
  for category in selected_categories:
160
  required_keywords.update(skill_categories[category])
161
+
162
  for resume in parsed_resumes:
 
163
  if any(req_skill.lower() in (skill.lower() for skill in resume["Skills"]) for req_skill in required_keywords):
164
  st.success(f"βœ… Selected: {resume['Name']}")
165
+
166
+ source_path = resume["file_path"]
167
+ dest_path = os.path.join(SELECTED_FOLDER, os.path.basename(source_path))
168
+
169
+ if os.path.exists(source_path):
170
+ shutil.copy(source_path, dest_path)
171
+ st.info(f"πŸ“ File copied to selected: {os.path.basename(source_path)}")
172
+ else:
173
+ st.error(f"❌ Could not find file to copy: {source_path}")
174
  else:
175
  st.warning(f"❌ Rejected: {resume['Name']}")