Spaces:

noa151
/

LeetCodePredictions

Sleeping

File size: 8,053 Bytes

import json
import gradio as gr
import joblib
import pandas as pd
from related_topics_prediction import MultiLabelThresholdOptimizer


def convert_to_float(value):
    if 'K' in value:
        return float(value.replace('K', '')) * 1_000
    elif 'M' in value:
        return float(value.replace('M', '')) * 1_000_000
    return float(value)  # If it's already a number


def convert_to_string(value):
    if value >= 1_000_000:
        return f"{value / 1_000_000:.1f}M"
    elif value >= 1_000:
        return f"{value / 1_000:.1f}K"
    return str(int(value))  # Keep it as an integer if it's below 1,000


def greet(title, description, difficulty, topics, likes, accepted, submission, comments, is_premium, predict):

    x_new = pd.DataFrame([{
        'id': 1,
        'title': str(title),
        'description': str(description),
        'is_premium': 1 if is_premium == "premium" else 0,
        'difficulty': 0 if difficulty == "Easy" else 1 if difficulty == "Hard" else 2,
        'acceptance_rate': convert_to_float(accepted)/convert_to_float(submission),
        'frequency': 0,
        'discuss_count': float(comments),
        'accepted': convert_to_float(accepted),
        'submissions': convert_to_float(submission),
        'companies': [""],
        'related_topics': topics.split(',') if isinstance(topics, str) else topics,
        'likes': convert_to_float(likes),
        'dislikes': 0,
        'rating': convert_to_float(likes) / (convert_to_float(likes) + 0),
        'asked_by_faang': 0,
        'similar_questions': ""
    }])

    # Efficient Multi-Hot Encoding for Companies
    company_data = {company: 1 if company in x_new["companies"].iloc[0] else 0 for company in companies_columns}
    x_new = pd.concat([x_new, pd.DataFrame([company_data])], axis=1)

    x_new = x_new.drop(columns=["companies"])  # Drop original column

    # Efficient Multi-Hot Encoding for Topics
    topic_data = {topic: 1 if topic in x_new["related_topics"].iloc[0] else 0 for topic in the_topics}
    x_new = pd.concat([x_new, pd.DataFrame([topic_data])], axis=1)

    x_new = x_new.drop(columns=["related_topics"])  # Drop original topics column

    # Label encode 'title'
    title_model = joblib.load("title_encoder.pkl")
    x_new['title'] = title_model.fit_transform(x_new['title'])

    if predict == "related topics":
        vectorizer = joblib.load("related_topics_vectorizer.pkl")

        new_tfidf = vectorizer.transform(x_new["description"])

        best_model_info = joblib.load('best_model_related_topics_info.pkl')
        best_model = joblib.load("best_related_topics_model.pkl")
        optimizer = MultiLabelThresholdOptimizer()
        optimizer.optimal_thresholds[best_model_info['model_name']] = best_model_info['threshold']

        predictions = optimizer.predict(best_model, new_tfidf, best_model_info['model_name'])

        mlb = joblib.load("related_topics_label_binarizer.pkl")
        predictions = mlb.inverse_transform(predictions)

        ans = f"the related topics are: {', '.join(map(str, predictions[0]))}"
        return ans

    else:
        vectorizer = joblib.load("tfidf_vectorizer.pkl")

        new_tfidf = vectorizer.transform(x_new["description"])

        # Convert to DataFrame
        new_tfidf_df = pd.DataFrame(new_tfidf.toarray(), columns=vectorizer.get_feature_names_out())
        x_new = pd.concat([x_new, new_tfidf_df], axis=1)
        x_new = x_new.drop(columns=['description'])

        if predict == "difficulty level":
            # load the dislike model because there is no dislike in the input
            dislikes_model, feature_names = joblib.load("dislikes_XGB_regression_model.pkl")

            x_new_filtered = x_new[feature_names]  # Select only the required features
            dislike = dislikes_model.predict(x_new_filtered)
            x_new['dislikes'] = dislike[0]
            x_new['rating']: convert_to_float(likes) / (convert_to_float(likes) + dislike[0])

            # Load the model
            class_model = joblib.load("level_classifier_model.pkl")

            # Get feature names from trained model
            trained_feature_names = class_model.named_steps['standardscaler'].get_feature_names_out()

            x_new = x_new[trained_feature_names]  # Reorder and remove extra columns

            # Fill missing columns with 0 (or a suitable default)
            for col in trained_feature_names:
                if col not in x_new:
                    x_new[col] = 0  # or another default value

            x_new = x_new[trained_feature_names]  # Ensure correct order again

            predictions = class_model.predict(x_new)

            if predictions == 1:
                prediction = "Hard"
            elif predictions == 0:
                prediction = "Easy"
            elif predictions == 2:
                prediction = "Medium"

            ans = f"the level difficulty is: {prediction}"
            return ans

        elif predict == "acceptance":
            # Load the model
            accepted_submissions_model, feature_names = joblib.load("accepted_submissions_regression_model.pkl")

            # Assuming `X_new` is a DataFrame with extra features
            x_new_filtered = x_new[feature_names]  # Select only the required features

            predictions = accepted_submissions_model.predict(x_new_filtered)

            ans = f"the accepted is: {convert_to_string(predictions[0])}"
            return ans

        elif predict == "number of likes":
            # Load the model
            likes_model, feature_names = joblib.load("likes_random_forest_regression_model.pkl")

            # Assuming `X_new` is a DataFrame with extra features
            x_new_filtered = x_new[feature_names]  # Select only the required features

            predictions = likes_model.predict(x_new_filtered)

            ans = f"the likes amount is: {convert_to_string(predictions[0])}"
            return ans

        elif predict == "number of dislikes":
            # Load the model
            dislikes_model, feature_names = joblib.load("dislikes_XGB_regression_model.pkl")

            # Assuming `x_new` is a DataFrame with extra features
            x_new_filtered = x_new[feature_names]  # Select only the required features

            predictions = dislikes_model.predict(x_new_filtered)

            ans = f"the dislikes amount is: {convert_to_string(predictions[0])}"
            return ans


with open("encoding_metadata.json", "r") as f:
    encoding_metadata = json.load(f)

the_topics = encoding_metadata["related_topics_columns"]
the_topics.remove("")
companies_columns = encoding_metadata["companies_columns"]
companies_columns.remove("")

demo = gr.Interface(
    fn=greet,
    inputs=[gr.Text(label="Title"), gr.Text(label="Description"),
            gr.Radio(choices=["Easy", "Medium", "Hard"], label="Difficulty Level"),
            gr.Dropdown(the_topics, multiselect=True, label="Related Topics",
                        info="choose all the related topics of this question"),
            gr.Text(label="Likes Amount"),
            gr.Text(label="Accepted Amount"),
            gr.Text(label="Submission Amount"),
            gr.Text(label="Comments Amount"),
            gr.Radio(choices=["premium", "not premium"], label="Is Premium"),
            gr.Radio(choices=["acceptance", "difficulty level", "number of likes", "number of dislikes",
                              "related topics"], label="Please Predict..")
            ],
    outputs=[gr.Text(label="The Prediction")],
    title="LEETCODE PREDICTOR",
    description="please go to the leetcode website (https://leetcode.com/problemset/) choose a question and copy the question's detiles to the relevant spaces, then choose what you whould like to predict and submit. the prediction result will appear on the right side of the screen 😉"
)

demo.launch()