ChaitanyaSubhakar commited on
Commit
b00ce35
Β·
verified Β·
1 Parent(s): 3b04f33

Update Home.py

Browse files
Files changed (1) hide show
  1. Home.py +65 -98
Home.py CHANGED
@@ -1,132 +1,99 @@
1
  import os
 
2
  import zipfile
3
- from pathlib import Path
4
  import streamlit as st
5
  from langchain_community.document_loaders import UnstructuredPDFLoader
6
- from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
7
  from langchain.prompts import PromptTemplate
8
- from langchain_core.output_parsers import PydanticOutputParser
9
  from pydantic import BaseModel, Field
10
 
11
- st.markdown(
12
- """
13
- <style>
14
- .stApp {
15
- background-color: midnightblue;
16
- color: white;
17
- }
18
- </style>
19
- """,
20
- unsafe_allow_html=True
21
- )
22
-
23
- # Setup your HF token and model (replace with your token handling)
24
- newhf = st.secrets["HF_TOKEN"]
25
-
26
- # Create folders if not present
27
- Path("Extracted_Resumes").mkdir(exist_ok=True)
28
- Path("Selected_Resumes").mkdir(exist_ok=True)
29
 
30
- # Set up the LLaMA model
31
  llama_model = HuggingFaceEndpoint(
32
  repo_id="meta-llama/Llama-3.1-8B-Instruct",
33
  provider="nebius",
34
  temperature=0.7,
35
- api_key=newhf,
36
  max_new_tokens=512,
37
  task="conversational"
38
  )
 
39
 
40
- model = ChatHuggingFace(
41
- llm=llama_model,
42
- repo_id="meta-llama/Llama-3.1-8B-Instruct",
43
- provider="nebius",
44
- temperature=0.7,
45
- api_key=newhf,
46
- max_new_tokens=512,
47
- task="conversational"
48
- )
49
-
50
- # Pydantic schema for parsing resume content
51
  class JobDesc(BaseModel):
52
- Objective: str = Field(description="Objective")
53
- Name: str = Field(description="Name")
54
- Age: int = Field(description="Age")
55
- Qualification: str = Field(description="Qualification")
56
- Skills: list[str] = Field(description="Skills")
57
- Experience: float = Field(description="Work Experience")
58
 
59
  parser = PydanticOutputParser(pydantic_object=JobDesc)
60
 
61
- # Prompt template for extracting fields from resume
62
- pt = PromptTemplate(template="""
63
- You are an information extraction expert. Extract the following fields from the given resume text and return ONLY the JSON that matches the format.
64
 
65
  Resume Text:
66
  {input}
67
 
68
- Return ONLY a JSON object in this format:
69
  {instruction}
70
 
71
- Do not include explanations, code, or markdown.
72
  """)
73
 
74
- # Streamlit app UI
75
- st.title("πŸ“„ Resume Screening Application")
76
-
77
- uploaded_zip = st.file_uploader("Upload a ZIP file containing resumes", type="zip")
78
 
 
79
  if uploaded_zip:
80
- with zipfile.ZipFile(uploaded_zip, "r") as zip_ref:
81
- zip_ref.extractall("Extracted_Resumes")
82
- st.success("βœ… Resumes extracted successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- resumes = list(Path("Extracted_Resumes").glob("*.pdf"))
85
- valid_resumes = []
 
 
86
 
87
- # Check for 2-page resumes
88
- for pdf in resumes:
89
  try:
90
- loader = UnstructuredPDFLoader(str(pdf), mode="paged")
91
- pages = loader.load()
92
- if len(pages) == 2:
93
- valid_resumes.append((pdf.name, pages))
94
  except Exception as e:
95
- st.warning(f"❌ Could not process {pdf.name}: {e}")
96
-
97
- if valid_resumes:
98
- st.subheader("πŸ”Ž Select Required Skills")
99
-
100
- # Extract skills from all resumes
101
- all_resume_skills = set()
102
-
103
- parsed_resumes = {}
104
-
105
- for filename, pages in valid_resumes:
106
- final_data = [page for page in pages]
107
- fp = pt.format(input=final_data, instruction=parser.get_format_instructions())
108
- result = model.invoke(fp)
109
- parsed_resume = parser.parse(result.content)
110
- parsed_resumes[filename] = parsed_resume
111
- all_resume_skills.update(map(str.lower, parsed_resume.Skills))
112
-
113
- selected_skills = st.multiselect("Choose required skills:", sorted(all_resume_skills))
114
-
115
- # Match resumes based on selected skills
116
- for filename, parsed_resume in parsed_resumes.items():
117
- resume_skills = parsed_resume.Skills
118
- found_skills = [
119
- skill for skill in selected_skills
120
- if any(skill.lower() in rs.lower() for rs in resume_skills)
121
- ]
122
-
123
- if set(found_skills) == set(selected_skills):
124
- src_path = Path("Extracted_Resumes") / filename
125
- dest_path = Path("Selected_Resumes") / filename
126
- with open(src_path, "rb") as src, open(dest_path, "wb") as dst:
127
- dst.write(src.read())
128
- st.success(f"βœ… {filename} matches and saved to 'Selected_Resumes'")
129
- else:
130
- st.info(f"ℹ️ {filename} does not match all selected skills.")
131
- else:
132
- st.warning("⚠️ No 2-page resumes found.")
 
1
  import os
2
+ import shutil
3
  import zipfile
 
4
  import streamlit as st
5
  from langchain_community.document_loaders import UnstructuredPDFLoader
6
+ from langchain.output_parsers import PydanticOutputParser
7
  from langchain.prompts import PromptTemplate
8
+ from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
9
  from pydantic import BaseModel, Field
10
 
11
+ # --- Setup Directories ---
12
+ EXTRACTED_DIR = "extracted"
13
+ SELECTED_DIR = "selected"
14
+ os.makedirs(EXTRACTED_DIR, exist_ok=True)
15
+ os.makedirs(SELECTED_DIR, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ # --- Model Setup ---
18
  llama_model = HuggingFaceEndpoint(
19
  repo_id="meta-llama/Llama-3.1-8B-Instruct",
20
  provider="nebius",
21
  temperature=0.7,
 
22
  max_new_tokens=512,
23
  task="conversational"
24
  )
25
+ model = ChatHuggingFace(llm=llama_model)
26
 
27
+ # --- Output Schema ---
 
 
 
 
 
 
 
 
 
 
28
  class JobDesc(BaseModel):
29
+ Objective: str = Field(...)
30
+ Name: str = Field(...)
31
+ Age: int = Field(...)
32
+ Qualification: str = Field(...)
33
+ Skills: list[str] = Field(...)
34
+ Experience: float = Field(...)
35
 
36
  parser = PydanticOutputParser(pydantic_object=JobDesc)
37
 
38
+ prompt_template = PromptTemplate(template="""
39
+ Extract the following fields from the resume below and return them ONLY as a valid JSON object:
 
40
 
41
  Resume Text:
42
  {input}
43
 
44
+ Return format:
45
  {instruction}
46
 
47
+ Only return the JSON. No explanation, no markdown.
48
  """)
49
 
50
+ # --- Streamlit UI ---
51
+ st.title("Resume Screening Application")
 
 
52
 
53
+ uploaded_zip = st.file_uploader("Upload a ZIP file of resumes (PDFs only):", type="zip")
54
  if uploaded_zip:
55
+ with zipfile.ZipFile(uploaded_zip, 'r') as zip_ref:
56
+ zip_ref.extractall(EXTRACTED_DIR)
57
+ st.success("Resumes extracted!")
58
+
59
+ resume_files = [f for f in os.listdir(EXTRACTED_DIR) if f.endswith(".pdf")]
60
+ parsed_resumes = []
61
+
62
+ for resume_file in resume_files:
63
+ path = os.path.join(EXTRACTED_DIR, resume_file)
64
+ loader = UnstructuredPDFLoader(path, mode="paged")
65
+ pages = loader.load()
66
+
67
+ if len(pages) > 2:
68
+ st.warning(f"❌ {resume_file} rejected (more than 2 pages)")
69
+ continue
70
+
71
+ # Convert pages to string for model input
72
+ resume_text = "\n".join([p.page_content for p in pages])
73
 
74
+ prompt = prompt_template.format(
75
+ input=resume_text,
76
+ instruction=parser.get_format_instructions()
77
+ )
78
 
 
 
79
  try:
80
+ result = model.invoke(prompt)
81
+ parsed = parser.parse(result.content)
82
+ parsed_resumes.append((resume_file, parsed))
83
+ st.success(f"βœ… {resume_file} parsed successfully")
84
  except Exception as e:
85
+ st.error(f"❌ Failed to parse {resume_file}: {e}")
86
+
87
+ if parsed_resumes:
88
+ st.subheader("Select Required Skills")
89
+ all_skills = sorted(list({skill for _, parsed in parsed_resumes for skill in parsed.Skills}))
90
+ selected_skills = st.multiselect("Choose required skills:", all_skills)
91
+
92
+ if st.button("Evaluate Resumes"):
93
+ for resume_file, parsed in parsed_resumes:
94
+ matched_skills = set(selected_skills).intersection(set(parsed.Skills))
95
+ if len(matched_skills) == len(selected_skills):
96
+ shutil.copy(os.path.join(EXTRACTED_DIR, resume_file), SELECTED_DIR)
97
+ st.success(f"πŸŽ‰ {resume_file} selected and saved in '{SELECTED_DIR}' folder")
98
+ else:
99
+ st.info(f"πŸ” {resume_file} does not match all selected skills")