Spaces:

noa151
/

LeetCodePredictions

Sleeping

App Files Files Community

noa151 commited on Feb 4

Commit

1aa5646

verified ·

1 Parent(s): 046cded

upload everithing i think i need

Browse files

Files changed (12) hide show

accepted_submissions_regression_model.pkl +3 -0
best_model_related_topics_info.pkl +3 -0
best_related_topics_model.pkl +3 -0
dislikes_XGB_regression_model.pkl +3 -0
gradio.py +190 -0
level_classifier_model.pkl +3 -0
likes_random_forest_regression_model.pkl +3 -0
related_topics_label_binarizer.pkl +3 -0
related_topics_vectorizer.pkl +3 -0
requirements.txt +0 -0
tfidf_vectorizer.pkl +3 -0
title_encoder.pkl +3 -0

accepted_submissions_regression_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:233cbb79b6757e1bda0f2d265df304a34c03bfc8a852de36cc854318e4749459
+size 1030

best_model_related_topics_info.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f8354b3e898fa16f5e5645c26c07934e5734184c801b33419e0ea07a7d08845c
+size 810

best_related_topics_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f3c831b6a3d0b772f7773cb1cd35cd6effe6c1c1cd07a754052a741d59aa5942
+size 28961909

dislikes_XGB_regression_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9396df7469ca204b1b808bb266149863efeab0f84e0f4c334e4596b93c24863f
+size 361090

gradio.py ADDED Viewed

	@@ -0,0 +1,190 @@

+import json
+import gradio as gr
+import joblib
+import pandas as pd
+from related_topics_prediction import MultiLabelThresholdOptimizer
+def convert_to_float(value):
+    if 'K' in value:
+        return float(value.replace('K', '')) * 1_000
+    elif 'M' in value:
+        return float(value.replace('M', '')) * 1_000_000
+    return float(value)  # If it's already a number
+def convert_to_string(value):
+    if value >= 1_000_000:
+        return f"{value / 1_000_000:.1f}M"
+    elif value >= 1_000:
+        return f"{value / 1_000:.1f}K"
+    return str(int(value))  # Keep it as an integer if it's below 1,000
+def greet(title, description, difficulty, topics, likes, accepted, submission, comments, is_premium, predict):
+    x_new = pd.DataFrame([{
+        'id': 1,
+        'title': str(title),
+        'description': str(description),
+        'is_premium': 1 if is_premium == "premium" else 0,
+        'difficulty': 0 if difficulty == "Easy" else 1 if difficulty == "Hard" else 2,
+        'acceptance_rate': convert_to_float(accepted)/convert_to_float(submission),
+        'frequency': 0,
+        'discuss_count': float(comments),
+        'accepted': convert_to_float(accepted),
+        'submissions': convert_to_float(submission),
+        'companies': [""],
+        'related_topics': topics.split(',') if isinstance(topics, str) else topics,
+        'likes': convert_to_float(likes),
+        'dislikes': 0,
+        'rating': convert_to_float(likes) / (convert_to_float(likes) + 0),
+        'asked_by_faang': 0,
+        'similar_questions': ""
+    }])
+    # Efficient Multi-Hot Encoding for Companies
+    company_data = {company: 1 if company in x_new["companies"].iloc[0] else 0 for company in companies_columns}
+    x_new = pd.concat([x_new, pd.DataFrame([company_data])], axis=1)
+    x_new = x_new.drop(columns=["companies"])  # Drop original column
+    # Efficient Multi-Hot Encoding for Topics
+    topic_data = {topic: 1 if topic in x_new["related_topics"].iloc[0] else 0 for topic in the_topics}
+    x_new = pd.concat([x_new, pd.DataFrame([topic_data])], axis=1)
+    x_new = x_new.drop(columns=["related_topics"])  # Drop original topics column
+    # Label encode 'title'
+    title_model = joblib.load("title_encoder.pkl")
+    x_new['title'] = title_model.fit_transform(x_new['title'])
+    if predict == "related topics":
+        vectorizer = joblib.load("related_topics_vectorizer.pkl")
+        new_tfidf = vectorizer.transform(x_new["description"])
+        best_model_info = joblib.load('best_model_related_topics_info.pkl')
+        best_model = joblib.load("best_related_topics_model.pkl")
+        optimizer = MultiLabelThresholdOptimizer()
+        optimizer.optimal_thresholds[best_model_info['model_name']] = best_model_info['threshold']
+        predictions = optimizer.predict(best_model, new_tfidf, best_model_info['model_name'])
+        mlb = joblib.load("related_topics_label_binarizer.pkl")
+        predictions = mlb.inverse_transform(predictions)
+        ans = f"the related topics are: {", ".join(map(str, predictions[0]))}"
+        return ans
+    else:
+        vectorizer = joblib.load("tfidf_vectorizer.pkl")
+        new_tfidf = vectorizer.transform(x_new["description"])
+        # Convert to DataFrame
+        new_tfidf_df = pd.DataFrame(new_tfidf.toarray(), columns=vectorizer.get_feature_names_out())
+        x_new = pd.concat([x_new, new_tfidf_df], axis=1)
+        x_new = x_new.drop(columns=['description'])
+        if predict == "difficulty level":
+            # load the dislike model because there is no dislike in the input
+            dislikes_model, feature_names = joblib.load("dislikes_XGB_regression_model.pkl")
+            x_new_filtered = x_new[feature_names]  # Select only the required features
+            dislike = dislikes_model.predict(x_new_filtered)
+            x_new['dislikes'] = dislike[0]
+            x_new['rating']: convert_to_float(likes) / (convert_to_float(likes) + dislike[0])
+            # Load the model
+            class_model = joblib.load("level_classifier_model.pkl")
+            # Get feature names from trained model
+            trained_feature_names = class_model.named_steps['standardscaler'].get_feature_names_out()
+            x_new = x_new[trained_feature_names]  # Reorder and remove extra columns
+            # Fill missing columns with 0 (or a suitable default)
+            for col in trained_feature_names:
+                if col not in x_new:
+                    x_new[col] = 0  # or another default value
+            x_new = x_new[trained_feature_names]  # Ensure correct order again
+            predictions = class_model.predict(x_new)
+            if predictions == 1:
+                prediction = "Hard"
+            elif predictions == 0:
+                prediction = "Easy"
+            elif predictions == 2:
+                prediction = "Medium"
+            ans = f"the level difficulty is: {prediction}"
+            return ans
+        elif predict == "acceptance":
+            # Load the model
+            accepted_submissions_model, feature_names = joblib.load("accepted_submissions_regression_model.pkl")
+            # Assuming `X_new` is a DataFrame with extra features
+            x_new_filtered = x_new[feature_names]  # Select only the required features
+            predictions = accepted_submissions_model.predict(x_new_filtered)
+            ans = f"the accepted is: {convert_to_string(predictions[0])}"
+            return ans
+        elif predict == "number of likes":
+            # Load the model
+            likes_model, feature_names = joblib.load("likes_random_forest_regression_model.pkl")
+            # Assuming `X_new` is a DataFrame with extra features
+            x_new_filtered = x_new[feature_names]  # Select only the required features
+            predictions = likes_model.predict(x_new_filtered)
+            ans = f"the likes amount is: {convert_to_string(predictions[0])}"
+            return ans
+        elif predict == "number of dislikes":
+            # Load the model
+            dislikes_model, feature_names = joblib.load("dislikes_XGB_regression_model.pkl")
+            # Assuming `x_new` is a DataFrame with extra features
+            x_new_filtered = x_new[feature_names]  # Select only the required features
+            predictions = dislikes_model.predict(x_new_filtered)
+            ans = f"the dislikes amount is: {convert_to_string(predictions[0])}"
+            return ans
+with open("encoding_metadata.json", "r") as f:
+    encoding_metadata = json.load(f)
+the_topics = encoding_metadata["related_topics_columns"]
+the_topics.remove("")
+companies_columns = encoding_metadata["companies_columns"]
+companies_columns.remove("")
+demo = gr.Interface(
+    fn=greet,
+    inputs=[gr.Text(label="Title"), gr.Text(label="Description"),
+            gr.Radio(choices=["Easy", "Medium", "Hard"], label="Difficulty Level"),
+            gr.Dropdown(the_topics, multiselect=True, label="Related Topics",
+                        info="choose all the related topics of this question"),
+            gr.Text(label="Likes Amount"),
+            gr.Text(label="Accepted Amount"),
+            gr.Text(label="Submission Amount"),
+            gr.Text(label="Comments Amount"),
+            gr.Radio(choices=["premium", "not premium"], label="Is Premium"),
+            gr.Radio(choices=["acceptance", "difficulty level", "number of likes", "number of dislikes",
+                              "related topics"], label="Please Predict..")
+            ],
+    outputs=[gr.Text(label="The Prediction")],
+    title="LEETCODE PREDICTOR",
+    description="please go to the leetcode website (https://leetcode.com/) choose a question and copy the question's detiles to the relevant spaces, then choose what you whould like to predict and submit. the prediction result will appear on the right side of the screen 😉"
+)
+demo.launch()

level_classifier_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0a7dab2b693d942ad00cab4ab8654d64ac24518de1bc8482efd318e0143d35e5
+size 8753130

likes_random_forest_regression_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3eb18f7bc8d6f75d4ef5477d9459ac76eacb09930e3a1222a0c22d1ec67d2ffe
+size 9841053

related_topics_label_binarizer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e79aeb74cb00981f72fe408084d83bed5195a2c21533abac001de5c789b3d96
+size 2091

related_topics_vectorizer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec6f8115762b9092ece70cfd7d0900fec7da080b8deba5f0b13565e4573acac1
+size 185124

requirements.txt ADDED Viewed

Binary file (8.16 kB). View file

tfidf_vectorizer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a93e2bc402f747446b3054913db329ca4206c83e547113f6dc222e9b4d91c11
+size 4008

title_encoder.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f7625a0bc0320b6a0ccb392dfbe631d9a2da81dbb25cb3fcc590e98bdb1fbfc0
+size 67440