Spaces:
Sleeping
Sleeping
File size: 2,683 Bytes
3a4750b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import pandas as pd
import random
import ast
from sklearn.preprocessing import MultiLabelBinarizer, StandardScaler
import lightgbm as lgb
import pickle
freelancers_df = pd.read_csv(r'freelancers.csv')
jobs_df = pd.read_csv(r'jobs.csv')
freelancers_df["Skills"] = freelancers_df["Skills"].apply(ast.literal_eval)
jobs_df["Required_Skills"] = jobs_df["Required_Skills"].apply(ast.literal_eval)
# converting text data into binary
mlb = MultiLabelBinarizer()
freelancer_skills = mlb.fit_transform(freelancers_df["Skills"])
job_skills = mlb.transform(jobs_df["Required_Skills"])
mlb.classes_
"""*Combining the encoded features back to the dataframe*"""
freelancer_features = pd.DataFrame(freelancer_skills, columns=[f"FSkill_{s}" for s in mlb.classes_])
freelancer_features["Hourly_Rate"] = freelancers_df["Hourly_Rate"]
freelancer_features["Rating"] = freelancers_df["Rating"]
freelancer_features["Completed_Projects"] = freelancers_df["Completed_Projects"]
freelancer_features["Freelancer_ID"] = freelancers_df["Freelancer_ID"]
job_features = pd.DataFrame(job_skills, columns=[f"JSkill_{s}" for s in mlb.classes_])
job_features["Budget"] = jobs_df["Budget"]
job_features["Duration_Days"] = jobs_df["Duration_Days"]
job_features["Job_ID"] = jobs_df["Job_ID"]
# Job-Freelancer Interactions dataframe
interactions = []
for job in jobs_df.itertuples():
selected_freelancers = random.sample(list(freelancers_df.Freelancer_ID), 20)
hired = random.choice(selected_freelancers)
for f in selected_freelancers:
interactions.append({
"Job_ID": job.Job_ID,
"Freelancer_ID": f,
"Is_Hired": int(f == hired)
})
interactions_df = pd.DataFrame(interactions)
# Merge and build training set
merged_df = interactions_df.merge(job_features, on="Job_ID").merge(freelancer_features, on="Freelancer_ID")
X = merged_df.drop(columns=["Job_ID","Freelancer_ID","Is_Hired"])
y = merged_df["Is_Hired"]
# Grouping number of freelancers to job id.
job_group = merged_df.groupby("Job_ID").size().to_list()
# Scale numeric features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Model training
ranker = lgb.LGBMRanker(
objective="lambdarank",
metric="ndcg",
learning_rate=0.1,
n_estimators=100,
random_state=42
)
ranker.fit(X_scaled, y, group=job_group)
# Save files
with open("freelancers.pkl", "wb") as f:
pickle.dump(freelancers_df, f)
with open("mlb.pkl", "wb") as f:
pickle.dump(mlb, f)
with open("scaler.pkl", "wb") as f:
pickle.dump(scaler, f)
with open("ranker_model.pkl", "wb") as f:
pickle.dump(ranker, f)
print("Model training and data saving completed.") |