|
import numpy as np |
|
import pandas as pd |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
import json |
|
|
|
def recommend_jobs_for_input_skills(input_hard_skills, input_soft_skills, input_major, companies, tfidf_vectorizer_skills, tfidf_vectorizer_majors, companies_skills_vec, companies_majors_vec): |
|
input_hard_skills_vec = tfidf_vectorizer_skills.transform([input_hard_skills]) |
|
input_soft_skills_vec = tfidf_vectorizer_skills.transform([input_soft_skills]) |
|
input_major_vec = tfidf_vectorizer_majors.transform([input_major]) |
|
|
|
input_skills_vec = (input_hard_skills_vec + input_soft_skills_vec) / 2 |
|
|
|
skills_similarity = cosine_similarity(input_skills_vec, companies_skills_vec) |
|
major_similarity = cosine_similarity(input_major_vec, companies_majors_vec) |
|
|
|
if skills_similarity.shape[1] != major_similarity.shape[1]: |
|
min_dim = min(skills_similarity.shape[1], major_similarity.shape[1]) |
|
skills_similarity = skills_similarity[:, :min_dim] |
|
major_similarity = major_similarity[:, :min_dim] |
|
|
|
combined_similarity = (skills_similarity + major_similarity) / 2 |
|
|
|
sorted_company_indices = np.argsort(-combined_similarity[0]) |
|
recommended_companies = companies.iloc[sorted_company_indices]['Major'].values[:3] |
|
|
|
return recommended_companies |
|
|
|
def handler(event, context): |
|
input_data = json.loads(event['body']) |
|
input_hard_skills = input_data["input_hard_skills"] |
|
input_soft_skills = input_data["input_soft_skills"] |
|
input_major = input_data["input_major"] |
|
|
|
users_data = "1st_train.csv" |
|
applicants = pd.read_csv(users_data) |
|
|
|
jobs_data = "jobs_data.csv" |
|
companies = pd.read_csv(jobs_data) |
|
|
|
tfidf_vectorizer_skills = TfidfVectorizer() |
|
tfidf_vectorizer_majors = TfidfVectorizer() |
|
|
|
all_skills = pd.concat([applicants['final_hard_skill'], applicants['final_soft_skill'], |
|
companies['final_hard_skill'], companies['final_soft_skill']]) |
|
all_majors = pd.concat([applicants['candidate_field'], companies['Major']]) |
|
|
|
all_skills_vectorized = tfidf_vectorizer_skills.fit_transform(all_skills) |
|
all_majors_vectorized = tfidf_vectorizer_majors.fit_transform(all_majors) |
|
|
|
num_applicants = len(applicants) |
|
num_companies = len(companies) |
|
|
|
applicants_skills_vectorized = all_skills_vectorized[:num_applicants*2] |
|
companies_skills_vectorized = all_skills_vectorized[num_applicants*2:] |
|
|
|
applicants_majors_vectorized = all_majors_vectorized[:num_applicants] |
|
companies_majors_vectorized = all_majors_vectorized[num_applicants:] |
|
|
|
recommended_jobs = recommend_jobs_for_input_skills(input_hard_skills, input_soft_skills, input_major, companies, tfidf_vectorizer_skills, tfidf_vectorizer_majors, companies_skills_vectorized, companies_majors_vectorized) |
|
|
|
return { |
|
'statusCode': 200, |
|
'body': json.dumps(recommended_jobs.tolist()) |
|
} |
|
|