jobsai / app.py
forestav's picture
update
f51752f
raw
history blame
10 kB
import gradio as gr
import PyPDF2
import docx2txt
from typing import Optional, List, Dict
import re
from pinecone_handler import PineconeHandler
import hopsworks
import pandas as pd
import os
from dotenv import load_dotenv
load_dotenv()
class Database:
def __init__(self):
# Initialize Hopsworks
project = "orestavf"
api_key = os.getenv("HOPSWORKS_API_KEY")
self.project = hopsworks.login(project=project, api_key_value=api_key)
self.fs = self.project.get_feature_store()
self.feedback_fg = self.fs.get_or_create_feature_group(
name="job_feedback",
version=1,
primary_key=["job_id"],
description="Feature group for storing user feedback on job matches.",
online_enabled=True
)
def save_feedback(self, job_id: str, resume_text: str, headline: str,
occupation: str, description: str, is_relevant: bool):
# Prepare feedback data as a pandas DataFrame
feedback_data = pd.DataFrame([{
"job_id": job_id,
"resume_text": resume_text,
"job_headline": headline,
"job_occupation": occupation,
"job_description": description,
"is_relevant": is_relevant,
#"timestamp": datetime.now()
}])
self.feedback_fg.insert(feedback_data)
print(f"Feedback saved to Hopsworks for job ID: {job_id}")
def extract_text(file) -> Optional[str]:
"""Extract text from uploaded resume file"""
if not file:
return None
try:
file_type = file.name.split('.')[-1].lower()
if file_type == 'pdf':
pdf_reader = PyPDF2.PdfReader(file)
return "\n".join(page.extract_text() for page in pdf_reader.pages)
elif file_type in ['docx', 'doc']:
return docx2txt.process(file)
elif file_type == 'txt':
return str(file.read(), "utf-8")
else:
return f"Unsupported file format: {file_type}"
except Exception as e:
return f"Error processing file: {str(e)}"
class JobMatcher:
def __init__(self):
self.handler = PineconeHandler()
self.db = Database()
self.current_results = []
self.current_resume_text = None
def search_jobs(self, file, num_results: int, city: str = "") -> List[Dict]:
"""Search for matching jobs and return results"""
if not file:
return [{"error": "Please upload a resume file."}]
try:
resume_text = extract_text(file)
if not resume_text:
return [{"error": "Could not extract text from resume."}]
self.current_resume_text = resume_text
resume_text = re.sub(r'\s+', ' ', resume_text).strip()
# Get results from Pinecone
results = self.handler.search_similar_ads(resume_text, top_k=num_results, city=city.strip())
if not results:
return [{"error": "No matching jobs found. Try adjusting your search criteria."}]
# Store results with their Pinecone IDs
self.current_results = [
{
'id': result.id, # Use Pinecone's ID
'score': result.score,
'metadata': result.metadata
}
for result in results
]
return self.current_results
except Exception as e:
return [{"error": f"Error: {str(e)}"}]
def submit_feedback(self, pinecone_id: str, is_relevant: bool) -> str:
"""Submit feedback for a specific job using Pinecone ID"""
try:
# Find the job in current results by Pinecone ID
job = next((job for job in self.current_results if job['id'] == pinecone_id), None)
if not job:
return "Error: Job not found"
metadata = job['metadata']
self.db.save_feedback(
job_id=pinecone_id, # Use Pinecone's ID
resume_text=self.current_resume_text,
headline=metadata['headline'],
occupation=metadata['occupation'],
description=metadata['description'],
is_relevant=is_relevant
)
return f"\u2713 Feedback saved for '{metadata['headline']}'"
except Exception as e:
return f"Error saving feedback: {str(e)}"
def create_interface():
matcher = JobMatcher()
with gr.Blocks() as interface:
gr.Markdown("# AI-Powered Job Search")
with gr.Row():
file_input = gr.File(label="Upload Resume (PDF, DOCX, or TXT)")
num_results = gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Number of Results")
city_input = gr.Textbox(label="Filter by City (Optional)")
search_btn = gr.Button("Search Jobs")
status = gr.Textbox(label="Status", interactive=False)
# Container for job results and feedback buttons
job_containers = []
for i in range(20): # Support up to 20 results
with gr.Column(visible=False) as container:
job_content = gr.Markdown("", elem_id=f"job_content_{i}")
with gr.Row():
relevant_btn = gr.Button("πŸ‘ Relevant", elem_id=f"relevant_{i}")
not_relevant_btn = gr.Button("πŸ‘Ž Not Relevant", elem_id=f"not_relevant_{i}")
feedback_status = gr.Markdown("")
job_containers.append({
'container': container,
'content': job_content,
'feedback_status': feedback_status,
'pinecone_id': None # Will store Pinecone ID for each job
})
def update_job_displays(file, num_results, city):
results = matcher.search_jobs(file, num_results, city)
# Initialize updates list with default values for all containers
updates = []
if "error" in results[0]:
# If there's an error, hide all containers and show error message
for _ in range(20):
updates.extend([
gr.update(visible=False), # Container visibility
"", # Job content
"" # Feedback status
])
updates.append(results[0]["error"]) # Status message
return updates
# Process results and generate updates
for i in range(20):
if i < len(results):
job = results[i]
metadata = job['metadata']
# Store Pinecone ID for this container
job_containers[i]['pinecone_id'] = job['id']
content = f"""
### {metadata['headline']}
**Match Score:** {job['score']:.2f}
**Location:** {metadata['city']}
**Occupation:** {metadata['occupation']}
**Published:** {metadata['published']}
{metadata['description'][:500]}...
**Contact:** {metadata.get('email', 'Not provided')}
**More Info:** {metadata.get('webpage_url', 'Not available')}
*Job ID: {job['id']}*
"""
updates.extend([
gr.update(visible=True), # Container visibility
content, # Job content
"" # Reset feedback status
])
else:
# For unused containers, hide them and clear content
updates.extend([
gr.update(visible=False), # Container visibility
"", # Job content
"" # Reset feedback status
])
# Add final status message
updates.append("Jobs found! Rate them as relevant or not relevant.")
return updates
def handle_feedback(container_index: int, is_relevant: bool):
pinecone_id = job_containers[container_index]['pinecone_id']
if pinecone_id:
response = matcher.submit_feedback(pinecone_id, is_relevant)
return response
return "Error: Job ID not found"
# Connect search button
all_outputs = []
for container in job_containers:
all_outputs.extend([
container['container'],
container['content'],
container['feedback_status']
])
all_outputs.append(status)
search_btn.click(
fn=update_job_displays,
inputs=[file_input, num_results, city_input],
outputs=all_outputs
)
# Connect feedback buttons for each container
for i, container in enumerate(job_containers):
container_obj = container['container']
feedback_status = container['feedback_status']
# Get the buttons from the container
relevant_btn = container_obj.children[1].children[0]
not_relevant_btn = container_obj.children[1].children[1]
relevant_btn.click(
fn=lambda idx=i: handle_feedback(idx, True),
outputs=[feedback_status]
)
not_relevant_btn.click(
fn=lambda idx=i: handle_feedback(idx, False),
outputs=[feedback_status]
)
return interface
if __name__ == "__main__":
interface = create_interface()
interface.launch(debug=True)