noa151 commited on
Commit
1aa5646
·
verified ·
1 Parent(s): 046cded

upload everithing i think i need

Browse files
accepted_submissions_regression_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:233cbb79b6757e1bda0f2d265df304a34c03bfc8a852de36cc854318e4749459
3
+ size 1030
best_model_related_topics_info.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8354b3e898fa16f5e5645c26c07934e5734184c801b33419e0ea07a7d08845c
3
+ size 810
best_related_topics_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3c831b6a3d0b772f7773cb1cd35cd6effe6c1c1cd07a754052a741d59aa5942
3
+ size 28961909
dislikes_XGB_regression_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9396df7469ca204b1b808bb266149863efeab0f84e0f4c334e4596b93c24863f
3
+ size 361090
gradio.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import gradio as gr
3
+ import joblib
4
+ import pandas as pd
5
+ from related_topics_prediction import MultiLabelThresholdOptimizer
6
+
7
+
8
+ def convert_to_float(value):
9
+ if 'K' in value:
10
+ return float(value.replace('K', '')) * 1_000
11
+ elif 'M' in value:
12
+ return float(value.replace('M', '')) * 1_000_000
13
+ return float(value) # If it's already a number
14
+
15
+
16
+ def convert_to_string(value):
17
+ if value >= 1_000_000:
18
+ return f"{value / 1_000_000:.1f}M"
19
+ elif value >= 1_000:
20
+ return f"{value / 1_000:.1f}K"
21
+ return str(int(value)) # Keep it as an integer if it's below 1,000
22
+
23
+
24
+ def greet(title, description, difficulty, topics, likes, accepted, submission, comments, is_premium, predict):
25
+
26
+ x_new = pd.DataFrame([{
27
+ 'id': 1,
28
+ 'title': str(title),
29
+ 'description': str(description),
30
+ 'is_premium': 1 if is_premium == "premium" else 0,
31
+ 'difficulty': 0 if difficulty == "Easy" else 1 if difficulty == "Hard" else 2,
32
+ 'acceptance_rate': convert_to_float(accepted)/convert_to_float(submission),
33
+ 'frequency': 0,
34
+ 'discuss_count': float(comments),
35
+ 'accepted': convert_to_float(accepted),
36
+ 'submissions': convert_to_float(submission),
37
+ 'companies': [""],
38
+ 'related_topics': topics.split(',') if isinstance(topics, str) else topics,
39
+ 'likes': convert_to_float(likes),
40
+ 'dislikes': 0,
41
+ 'rating': convert_to_float(likes) / (convert_to_float(likes) + 0),
42
+ 'asked_by_faang': 0,
43
+ 'similar_questions': ""
44
+ }])
45
+
46
+ # Efficient Multi-Hot Encoding for Companies
47
+ company_data = {company: 1 if company in x_new["companies"].iloc[0] else 0 for company in companies_columns}
48
+ x_new = pd.concat([x_new, pd.DataFrame([company_data])], axis=1)
49
+
50
+ x_new = x_new.drop(columns=["companies"]) # Drop original column
51
+
52
+ # Efficient Multi-Hot Encoding for Topics
53
+ topic_data = {topic: 1 if topic in x_new["related_topics"].iloc[0] else 0 for topic in the_topics}
54
+ x_new = pd.concat([x_new, pd.DataFrame([topic_data])], axis=1)
55
+
56
+ x_new = x_new.drop(columns=["related_topics"]) # Drop original topics column
57
+
58
+ # Label encode 'title'
59
+ title_model = joblib.load("title_encoder.pkl")
60
+ x_new['title'] = title_model.fit_transform(x_new['title'])
61
+
62
+ if predict == "related topics":
63
+ vectorizer = joblib.load("related_topics_vectorizer.pkl")
64
+
65
+ new_tfidf = vectorizer.transform(x_new["description"])
66
+
67
+ best_model_info = joblib.load('best_model_related_topics_info.pkl')
68
+ best_model = joblib.load("best_related_topics_model.pkl")
69
+ optimizer = MultiLabelThresholdOptimizer()
70
+ optimizer.optimal_thresholds[best_model_info['model_name']] = best_model_info['threshold']
71
+
72
+ predictions = optimizer.predict(best_model, new_tfidf, best_model_info['model_name'])
73
+
74
+ mlb = joblib.load("related_topics_label_binarizer.pkl")
75
+ predictions = mlb.inverse_transform(predictions)
76
+
77
+ ans = f"the related topics are: {", ".join(map(str, predictions[0]))}"
78
+ return ans
79
+
80
+ else:
81
+ vectorizer = joblib.load("tfidf_vectorizer.pkl")
82
+
83
+ new_tfidf = vectorizer.transform(x_new["description"])
84
+
85
+ # Convert to DataFrame
86
+ new_tfidf_df = pd.DataFrame(new_tfidf.toarray(), columns=vectorizer.get_feature_names_out())
87
+ x_new = pd.concat([x_new, new_tfidf_df], axis=1)
88
+ x_new = x_new.drop(columns=['description'])
89
+
90
+ if predict == "difficulty level":
91
+ # load the dislike model because there is no dislike in the input
92
+ dislikes_model, feature_names = joblib.load("dislikes_XGB_regression_model.pkl")
93
+
94
+ x_new_filtered = x_new[feature_names] # Select only the required features
95
+ dislike = dislikes_model.predict(x_new_filtered)
96
+ x_new['dislikes'] = dislike[0]
97
+ x_new['rating']: convert_to_float(likes) / (convert_to_float(likes) + dislike[0])
98
+
99
+ # Load the model
100
+ class_model = joblib.load("level_classifier_model.pkl")
101
+
102
+ # Get feature names from trained model
103
+ trained_feature_names = class_model.named_steps['standardscaler'].get_feature_names_out()
104
+
105
+ x_new = x_new[trained_feature_names] # Reorder and remove extra columns
106
+
107
+ # Fill missing columns with 0 (or a suitable default)
108
+ for col in trained_feature_names:
109
+ if col not in x_new:
110
+ x_new[col] = 0 # or another default value
111
+
112
+ x_new = x_new[trained_feature_names] # Ensure correct order again
113
+
114
+ predictions = class_model.predict(x_new)
115
+
116
+ if predictions == 1:
117
+ prediction = "Hard"
118
+ elif predictions == 0:
119
+ prediction = "Easy"
120
+ elif predictions == 2:
121
+ prediction = "Medium"
122
+
123
+ ans = f"the level difficulty is: {prediction}"
124
+ return ans
125
+
126
+ elif predict == "acceptance":
127
+ # Load the model
128
+ accepted_submissions_model, feature_names = joblib.load("accepted_submissions_regression_model.pkl")
129
+
130
+ # Assuming `X_new` is a DataFrame with extra features
131
+ x_new_filtered = x_new[feature_names] # Select only the required features
132
+
133
+ predictions = accepted_submissions_model.predict(x_new_filtered)
134
+
135
+ ans = f"the accepted is: {convert_to_string(predictions[0])}"
136
+ return ans
137
+
138
+ elif predict == "number of likes":
139
+ # Load the model
140
+ likes_model, feature_names = joblib.load("likes_random_forest_regression_model.pkl")
141
+
142
+ # Assuming `X_new` is a DataFrame with extra features
143
+ x_new_filtered = x_new[feature_names] # Select only the required features
144
+
145
+ predictions = likes_model.predict(x_new_filtered)
146
+
147
+ ans = f"the likes amount is: {convert_to_string(predictions[0])}"
148
+ return ans
149
+
150
+ elif predict == "number of dislikes":
151
+ # Load the model
152
+ dislikes_model, feature_names = joblib.load("dislikes_XGB_regression_model.pkl")
153
+
154
+ # Assuming `x_new` is a DataFrame with extra features
155
+ x_new_filtered = x_new[feature_names] # Select only the required features
156
+
157
+ predictions = dislikes_model.predict(x_new_filtered)
158
+
159
+ ans = f"the dislikes amount is: {convert_to_string(predictions[0])}"
160
+ return ans
161
+
162
+
163
+ with open("encoding_metadata.json", "r") as f:
164
+ encoding_metadata = json.load(f)
165
+
166
+ the_topics = encoding_metadata["related_topics_columns"]
167
+ the_topics.remove("")
168
+ companies_columns = encoding_metadata["companies_columns"]
169
+ companies_columns.remove("")
170
+
171
+ demo = gr.Interface(
172
+ fn=greet,
173
+ inputs=[gr.Text(label="Title"), gr.Text(label="Description"),
174
+ gr.Radio(choices=["Easy", "Medium", "Hard"], label="Difficulty Level"),
175
+ gr.Dropdown(the_topics, multiselect=True, label="Related Topics",
176
+ info="choose all the related topics of this question"),
177
+ gr.Text(label="Likes Amount"),
178
+ gr.Text(label="Accepted Amount"),
179
+ gr.Text(label="Submission Amount"),
180
+ gr.Text(label="Comments Amount"),
181
+ gr.Radio(choices=["premium", "not premium"], label="Is Premium"),
182
+ gr.Radio(choices=["acceptance", "difficulty level", "number of likes", "number of dislikes",
183
+ "related topics"], label="Please Predict..")
184
+ ],
185
+ outputs=[gr.Text(label="The Prediction")],
186
+ title="LEETCODE PREDICTOR",
187
+ description="please go to the leetcode website (https://leetcode.com/) choose a question and copy the question's detiles to the relevant spaces, then choose what you whould like to predict and submit. the prediction result will appear on the right side of the screen 😉"
188
+ )
189
+
190
+ demo.launch()
level_classifier_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a7dab2b693d942ad00cab4ab8654d64ac24518de1bc8482efd318e0143d35e5
3
+ size 8753130
likes_random_forest_regression_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eb18f7bc8d6f75d4ef5477d9459ac76eacb09930e3a1222a0c22d1ec67d2ffe
3
+ size 9841053
related_topics_label_binarizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e79aeb74cb00981f72fe408084d83bed5195a2c21533abac001de5c789b3d96
3
+ size 2091
related_topics_vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec6f8115762b9092ece70cfd7d0900fec7da080b8deba5f0b13565e4573acac1
3
+ size 185124
requirements.txt ADDED
Binary file (8.16 kB). View file
 
tfidf_vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a93e2bc402f747446b3054913db329ca4206c83e547113f6dc222e9b4d91c11
3
+ size 4008
title_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7625a0bc0320b6a0ccb392dfbe631d9a2da81dbb25cb3fcc590e98bdb1fbfc0
3
+ size 67440