|
|
""" |
|
|
Mock data for HRHUB demo. |
|
|
This file contains hardcoded data for MVP demonstration. |
|
|
|
|
|
TO SWITCH TO REAL DATA: |
|
|
Replace imports in app.py: |
|
|
from data.mock_data import get_candidate_data, get_company_matches |
|
|
↓ |
|
|
from data.data_loader import get_candidate_data, get_company_matches |
|
|
""" |
|
|
|
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from typing import Dict, List, Tuple, Any |
|
|
|
|
|
|
|
|
def get_candidate_data(candidate_id: int = 0) -> Dict[str, Any]: |
|
|
""" |
|
|
Get candidate data by ID. |
|
|
|
|
|
Args: |
|
|
candidate_id: Candidate identifier (0 for demo) |
|
|
|
|
|
Returns: |
|
|
Dictionary with candidate information |
|
|
""" |
|
|
|
|
|
|
|
|
candidate = { |
|
|
'id': 0, |
|
|
'name': 'Demo Candidate #0', |
|
|
|
|
|
|
|
|
'skills': [ |
|
|
'Python', 'Machine Learning', 'Data Science', 'SQL', 'TensorFlow', |
|
|
'Pandas', 'NumPy', 'Scikit-learn', 'Deep Learning', 'NLP', |
|
|
'Computer Vision', 'AWS', 'Docker', 'Git', 'Agile' |
|
|
], |
|
|
|
|
|
|
|
|
'educational_institution_name': ['Technical University of Denmark'], |
|
|
'degree_names': ['Master of Science'], |
|
|
'passing_years': ['2023'], |
|
|
'educational_results': ['3.8'], |
|
|
'result_types': ['GPA'], |
|
|
'major_field_of_studies': ['Business Data Science'], |
|
|
|
|
|
|
|
|
'professional_company_names': ['TechCorp', 'DataHub', 'AI Solutions'], |
|
|
'company_urls': ['techcorp.com', 'datahub.io', 'aisolutions.ai'], |
|
|
'start_dates': ['Jan 2021', 'Jun 2019', 'Jan 2018'], |
|
|
'end_dates': ['Current', 'Dec 2020', 'May 2019'], |
|
|
'positions': ['Data Scientist', 'ML Engineer', 'Data Analyst'], |
|
|
'locations': ['Copenhagen, Denmark', 'Aalborg, Denmark', 'Aarhus, Denmark'], |
|
|
'responsibilities': """ |
|
|
• Developed ML models for customer segmentation |
|
|
• Built NLP pipeline for sentiment analysis |
|
|
• Deployed models to production using AWS |
|
|
• Collaborated with cross-functional teams |
|
|
• Mentored junior data scientists |
|
|
""", |
|
|
|
|
|
|
|
|
'languages': ['English', 'Danish', 'Portuguese'], |
|
|
'proficiency_levels': ['Fluent', 'Native', 'Native'], |
|
|
'certification_providers': ['AWS', 'Google Cloud', 'Coursera'], |
|
|
'certification_skills': ['AWS ML Specialty', 'GCP Data Engineer', 'Deep Learning'], |
|
|
|
|
|
|
|
|
'career_objective': 'Seeking senior data science role focusing on NLP and LLM applications', |
|
|
'job_position_name': 'Senior Data Scientist / ML Engineer', |
|
|
|
|
|
|
|
|
'matched_score': 0.85, |
|
|
|
|
|
|
|
|
'text': """ |
|
|
Skills: Python, Machine Learning, Data Science, SQL, TensorFlow, Pandas, NumPy, |
|
|
Scikit-learn, Deep Learning, NLP, Computer Vision, AWS, Docker, Git, Agile. |
|
|
|
|
|
Education: Master of Science in Business Data Science from Technical University of Denmark (2023). |
|
|
|
|
|
Experience: Data Scientist at TechCorp (Current), ML Engineer at DataHub, Data Analyst at AI Solutions. |
|
|
Specialized in ML model development, NLP, and production deployment. |
|
|
|
|
|
Languages: English (Fluent), Danish (Native), Portuguese (Native). |
|
|
|
|
|
Certifications: AWS ML Specialty, GCP Data Engineer, Deep Learning. |
|
|
""" |
|
|
} |
|
|
|
|
|
return candidate |
|
|
|
|
|
|
|
|
def get_company_matches(candidate_id: int = 0, top_k: int = 10) -> List[Tuple[int, float, Dict[str, Any]]]: |
|
|
""" |
|
|
Get top company matches for a candidate. |
|
|
|
|
|
Args: |
|
|
candidate_id: Candidate identifier |
|
|
top_k: Number of top matches to return |
|
|
|
|
|
Returns: |
|
|
List of tuples: (company_id, similarity_score, company_data) |
|
|
""" |
|
|
|
|
|
|
|
|
companies = [ |
|
|
{ |
|
|
'id': 29286, |
|
|
'name': 'Anblicks', |
|
|
'similarity_score': 0.7028, |
|
|
'description': 'Leading data analytics and AI consulting firm specializing in cloud-native solutions', |
|
|
'industries_list': 'Information Technology, Data Analytics, Cloud Computing', |
|
|
'specialties_list': 'Big Data | Machine Learning | Cloud Architecture | Data Engineering', |
|
|
'employee_count': '500-1000', |
|
|
'city': 'San Francisco', |
|
|
'state': 'CA', |
|
|
'country': 'USA', |
|
|
'required_skills': 'Python | Machine Learning | AWS | TensorFlow | Data Science | SQL | Spark', |
|
|
'posted_job_titles': 'Senior Data Scientist | ML Engineer | Data Architect', |
|
|
'experience_levels': 'Mid-Senior level | Senior level', |
|
|
'work_types': 'Full-time | Remote', |
|
|
'text': 'Technology company seeking ML experts with Python, AWS, and production experience...' |
|
|
}, |
|
|
{ |
|
|
'id': 15234, |
|
|
'name': 'iO Associates - US', |
|
|
'similarity_score': 0.7026, |
|
|
'description': 'Global talent solutions provider connecting tech professionals with innovative companies', |
|
|
'industries_list': 'Staffing and Recruiting, Technology', |
|
|
'specialties_list': 'Data Science Recruitment | AI/ML Placement | Tech Consulting', |
|
|
'employee_count': '1000-5000', |
|
|
'city': 'New York', |
|
|
'state': 'NY', |
|
|
'country': 'USA', |
|
|
'required_skills': 'Python | Data Science | Machine Learning | Deep Learning | NLP', |
|
|
'posted_job_titles': 'Data Scientist | AI Engineer | Research Scientist', |
|
|
'experience_levels': 'Mid-Senior level', |
|
|
'work_types': 'Full-time | Contract', |
|
|
'text': 'Recruiting firm specializing in data science and AI talent placement...' |
|
|
}, |
|
|
{ |
|
|
'id': 8721, |
|
|
'name': 'DATAECONOMY', |
|
|
'similarity_score': 0.6849, |
|
|
'description': 'Data platform company building next-gen analytics solutions', |
|
|
'industries_list': 'Computer Software, Big Data', |
|
|
'specialties_list': 'Data Analytics | Business Intelligence | ETL | Data Warehousing', |
|
|
'employee_count': '200-500', |
|
|
'city': 'Boston', |
|
|
'state': 'MA', |
|
|
'country': 'USA', |
|
|
'required_skills': 'SQL | Python | Data Modeling | ETL | Tableau | AWS', |
|
|
'posted_job_titles': 'Data Engineer | Analytics Engineer | BI Developer', |
|
|
'experience_levels': 'Mid level | Mid-Senior level', |
|
|
'work_types': 'Full-time | Hybrid', |
|
|
'text': 'Building data infrastructure and analytics platforms...' |
|
|
}, |
|
|
{ |
|
|
'id': 12983, |
|
|
'name': 'Datavail', |
|
|
'similarity_score': 0.6827, |
|
|
'description': 'Database and data management services company', |
|
|
'industries_list': 'Information Technology, Database Management', |
|
|
'specialties_list': 'Database Administration | Cloud Migration | Performance Tuning', |
|
|
'employee_count': '500-1000', |
|
|
'city': 'Denver', |
|
|
'state': 'CO', |
|
|
'country': 'USA', |
|
|
'required_skills': 'SQL | Database Design | Python | Cloud Platforms | Performance Optimization', |
|
|
'posted_job_titles': 'Database Engineer | Data Platform Engineer | Cloud DBA', |
|
|
'experience_levels': 'Mid-Senior level', |
|
|
'work_types': 'Full-time | Remote', |
|
|
'text': 'Specialized in database management and cloud data solutions...' |
|
|
}, |
|
|
{ |
|
|
'id': 45672, |
|
|
'name': 'BitPusher', |
|
|
'similarity_score': 0.6776, |
|
|
'description': 'Software development and IT consulting firm', |
|
|
'industries_list': 'Computer Software, IT Services', |
|
|
'specialties_list': 'Custom Software Development | Cloud Solutions | DevOps', |
|
|
'employee_count': '50-200', |
|
|
'city': 'Austin', |
|
|
'state': 'TX', |
|
|
'country': 'USA', |
|
|
'required_skills': 'Python | JavaScript | AWS | Docker | Kubernetes | CI/CD', |
|
|
'posted_job_titles': 'Software Engineer | DevOps Engineer | Full Stack Developer', |
|
|
'experience_levels': 'Entry level | Mid level', |
|
|
'work_types': 'Full-time', |
|
|
'text': 'Building custom software solutions for enterprise clients...' |
|
|
}, |
|
|
{ |
|
|
'id': 33421, |
|
|
'name': 'Neural Dynamics', |
|
|
'similarity_score': 0.6654, |
|
|
'description': 'AI research lab focused on neural networks and deep learning', |
|
|
'industries_list': 'Research, Artificial Intelligence', |
|
|
'specialties_list': 'Deep Learning | Computer Vision | NLP | Reinforcement Learning', |
|
|
'employee_count': '100-200', |
|
|
'city': 'Seattle', |
|
|
'state': 'WA', |
|
|
'country': 'USA', |
|
|
'required_skills': 'PyTorch | TensorFlow | Deep Learning | Computer Vision | Research', |
|
|
'posted_job_titles': 'Research Scientist | ML Researcher | AI Engineer', |
|
|
'experience_levels': 'Senior level | Lead', |
|
|
'work_types': 'Full-time | Onsite', |
|
|
'text': 'Cutting-edge AI research in neural networks and applications...' |
|
|
}, |
|
|
{ |
|
|
'id': 28945, |
|
|
'name': 'CloudScale Analytics', |
|
|
'similarity_score': 0.6543, |
|
|
'description': 'Cloud-native data analytics platform', |
|
|
'industries_list': 'Cloud Computing, Analytics', |
|
|
'specialties_list': 'Cloud Analytics | Real-time Processing | Data Pipelines', |
|
|
'employee_count': '200-500', |
|
|
'city': 'San Jose', |
|
|
'state': 'CA', |
|
|
'country': 'USA', |
|
|
'required_skills': 'AWS | Python | Spark | Kafka | Data Engineering | Distributed Systems', |
|
|
'posted_job_titles': 'Data Engineer | Platform Engineer | Solutions Architect', |
|
|
'experience_levels': 'Mid-Senior level', |
|
|
'work_types': 'Full-time | Remote', |
|
|
'text': 'Building scalable data analytics infrastructure in the cloud...' |
|
|
}, |
|
|
{ |
|
|
'id': 19283, |
|
|
'name': 'DataForge Labs', |
|
|
'similarity_score': 0.6421, |
|
|
'description': 'ML operations and MLOps platform provider', |
|
|
'industries_list': 'Machine Learning, DevOps', |
|
|
'specialties_list': 'MLOps | Model Deployment | ML Infrastructure | Monitoring', |
|
|
'employee_count': '50-100', |
|
|
'city': 'Palo Alto', |
|
|
'state': 'CA', |
|
|
'country': 'USA', |
|
|
'required_skills': 'Python | Docker | Kubernetes | ML Deployment | Monitoring Tools', |
|
|
'posted_job_titles': 'MLOps Engineer | Platform Engineer | DevOps Engineer', |
|
|
'experience_levels': 'Mid level | Mid-Senior level', |
|
|
'work_types': 'Full-time | Hybrid', |
|
|
'text': 'Helping companies deploy and manage ML models at scale...' |
|
|
}, |
|
|
{ |
|
|
'id': 51234, |
|
|
'name': 'InsightAI', |
|
|
'similarity_score': 0.6312, |
|
|
'description': 'Business intelligence and predictive analytics company', |
|
|
'industries_list': 'Business Intelligence, Predictive Analytics', |
|
|
'specialties_list': 'Forecasting | Predictive Modeling | BI Tools | Dashboards', |
|
|
'employee_count': '100-200', |
|
|
'city': 'Chicago', |
|
|
'state': 'IL', |
|
|
'country': 'USA', |
|
|
'required_skills': 'Python | R | Tableau | PowerBI | Statistical Modeling | SQL', |
|
|
'posted_job_titles': 'Data Analyst | BI Developer | Analytics Engineer', |
|
|
'experience_levels': 'Mid level', |
|
|
'work_types': 'Full-time | Hybrid', |
|
|
'text': 'Providing predictive analytics and BI solutions for enterprises...' |
|
|
}, |
|
|
{ |
|
|
'id': 67821, |
|
|
'name': 'QuantumLeap Technologies', |
|
|
'similarity_score': 0.6198, |
|
|
'description': 'Quantum computing and advanced algorithms research', |
|
|
'industries_list': 'Quantum Computing, Research', |
|
|
'specialties_list': 'Quantum Algorithms | High-Performance Computing | Cryptography', |
|
|
'employee_count': '50-100', |
|
|
'city': 'Cambridge', |
|
|
'state': 'MA', |
|
|
'country': 'USA', |
|
|
'required_skills': 'Python | Quantum Computing | Linear Algebra | Algorithms | Research', |
|
|
'posted_job_titles': 'Quantum Research Scientist | Algorithm Engineer | Research Engineer', |
|
|
'experience_levels': 'Senior level | PhD level', |
|
|
'work_types': 'Full-time | Onsite', |
|
|
'text': 'Pioneering quantum computing applications and algorithms...' |
|
|
} |
|
|
] |
|
|
|
|
|
|
|
|
matches = [ |
|
|
(comp['id'], comp['similarity_score'], comp) |
|
|
for comp in companies[:top_k] |
|
|
] |
|
|
|
|
|
return matches |
|
|
|
|
|
|
|
|
def get_network_graph_data(candidate_id: int = 0, top_k: int = 10) -> Dict[str, Any]: |
|
|
""" |
|
|
Generate network graph data for visualization. |
|
|
|
|
|
Args: |
|
|
candidate_id: Candidate identifier |
|
|
top_k: Number of companies to include |
|
|
|
|
|
Returns: |
|
|
Dictionary with nodes and edges for network graph |
|
|
""" |
|
|
|
|
|
candidate = get_candidate_data(candidate_id) |
|
|
matches = get_company_matches(candidate_id, top_k) |
|
|
|
|
|
|
|
|
nodes = [] |
|
|
|
|
|
|
|
|
nodes.append({ |
|
|
'id': f'C{candidate_id}', |
|
|
'label': f"Candidate #{candidate_id}", |
|
|
'title': candidate['name'], |
|
|
'color': '#00FF00', |
|
|
'shape': 'dot', |
|
|
'size': 25 |
|
|
}) |
|
|
|
|
|
|
|
|
for comp_id, score, comp_data in matches: |
|
|
nodes.append({ |
|
|
'id': f'J{comp_id}', |
|
|
'label': comp_data['name'][:20], |
|
|
'title': f"{comp_data['name']}\nScore: {score:.4f}", |
|
|
'color': '#FF0000', |
|
|
'shape': 'square', |
|
|
'size': 15 + (score * 20) |
|
|
}) |
|
|
|
|
|
|
|
|
edges = [] |
|
|
|
|
|
for comp_id, score, comp_data in matches: |
|
|
edges.append({ |
|
|
'from': f'C{candidate_id}', |
|
|
'to': f'J{comp_id}', |
|
|
'value': score, |
|
|
'title': f'Match Score: {score:.4f}', |
|
|
'color': {'opacity': score} |
|
|
}) |
|
|
|
|
|
return { |
|
|
'nodes': nodes, |
|
|
'edges': edges |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
candidate = get_candidate_data(0) |
|
|
print(f"✅ Candidate: {candidate['name']}") |
|
|
|
|
|
matches = get_company_matches(0, 5) |
|
|
print(f"✅ Top 5 matches loaded") |
|
|
|
|
|
graph_data = get_network_graph_data(0, 5) |
|
|
print(f"✅ Graph data: {len(graph_data['nodes'])} nodes, {len(graph_data['edges'])} edges") |
|
|
|