File size: 2,683 Bytes
3a4750b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import pandas as pd
import random
import ast
from sklearn.preprocessing import MultiLabelBinarizer, StandardScaler
import lightgbm as lgb
import pickle


freelancers_df = pd.read_csv(r'freelancers.csv')

jobs_df = pd.read_csv(r'jobs.csv')


freelancers_df["Skills"] = freelancers_df["Skills"].apply(ast.literal_eval)
jobs_df["Required_Skills"] = jobs_df["Required_Skills"].apply(ast.literal_eval)


# converting text data into binary

mlb = MultiLabelBinarizer()

freelancer_skills = mlb.fit_transform(freelancers_df["Skills"])

job_skills = mlb.transform(jobs_df["Required_Skills"])

mlb.classes_


"""*Combining the encoded features back to the dataframe*"""

freelancer_features = pd.DataFrame(freelancer_skills, columns=[f"FSkill_{s}" for s in mlb.classes_])
freelancer_features["Hourly_Rate"] = freelancers_df["Hourly_Rate"]
freelancer_features["Rating"] = freelancers_df["Rating"]
freelancer_features["Completed_Projects"] = freelancers_df["Completed_Projects"]
freelancer_features["Freelancer_ID"] = freelancers_df["Freelancer_ID"]

job_features = pd.DataFrame(job_skills, columns=[f"JSkill_{s}" for s in mlb.classes_])
job_features["Budget"] = jobs_df["Budget"]
job_features["Duration_Days"] = jobs_df["Duration_Days"]
job_features["Job_ID"] = jobs_df["Job_ID"]


# Job-Freelancer Interactions dataframe

interactions = []

for job in jobs_df.itertuples():
    selected_freelancers = random.sample(list(freelancers_df.Freelancer_ID), 20)
    hired = random.choice(selected_freelancers)
    for f in selected_freelancers:
        interactions.append({
            "Job_ID": job.Job_ID,
            "Freelancer_ID": f,
            "Is_Hired": int(f == hired)
        })

interactions_df = pd.DataFrame(interactions)


# Merge and build training set

merged_df = interactions_df.merge(job_features, on="Job_ID").merge(freelancer_features, on="Freelancer_ID")


X = merged_df.drop(columns=["Job_ID","Freelancer_ID","Is_Hired"])
y = merged_df["Is_Hired"]


# Grouping number of freelancers to job id.

job_group = merged_df.groupby("Job_ID").size().to_list()


# Scale numeric features

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


# Model training

ranker = lgb.LGBMRanker(
    objective="lambdarank",
    metric="ndcg",
    learning_rate=0.1,
    n_estimators=100,
    random_state=42
)

ranker.fit(X_scaled, y, group=job_group)


# Save files
with open("freelancers.pkl", "wb") as f:
    pickle.dump(freelancers_df, f)
with open("mlb.pkl", "wb") as f:
    pickle.dump(mlb, f)
with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)
with open("ranker_model.pkl", "wb") as f:
    pickle.dump(ranker, f)

print("Model training and data saving completed.")