import json import gradio as gr import joblib import pandas as pd from related_topics_prediction import MultiLabelThresholdOptimizer def convert_to_float(value): if 'K' in value: return float(value.replace('K', '')) * 1_000 elif 'M' in value: return float(value.replace('M', '')) * 1_000_000 return float(value) # If it's already a number def convert_to_string(value): if value >= 1_000_000: return f"{value / 1_000_000:.1f}M" elif value >= 1_000: return f"{value / 1_000:.1f}K" return str(int(value)) # Keep it as an integer if it's below 1,000 def greet(title, description, difficulty, topics, likes, accepted, submission, comments, is_premium, predict): x_new = pd.DataFrame([{ 'id': 1, 'title': str(title), 'description': str(description), 'is_premium': 1 if is_premium == "premium" else 0, 'difficulty': 0 if difficulty == "Easy" else 1 if difficulty == "Hard" else 2, 'acceptance_rate': convert_to_float(accepted)/convert_to_float(submission), 'frequency': 0, 'discuss_count': float(comments), 'accepted': convert_to_float(accepted), 'submissions': convert_to_float(submission), 'companies': [""], 'related_topics': topics.split(',') if isinstance(topics, str) else topics, 'likes': convert_to_float(likes), 'dislikes': 0, 'rating': convert_to_float(likes) / (convert_to_float(likes) + 0), 'asked_by_faang': 0, 'similar_questions': "" }]) # Efficient Multi-Hot Encoding for Companies company_data = {company: 1 if company in x_new["companies"].iloc[0] else 0 for company in companies_columns} x_new = pd.concat([x_new, pd.DataFrame([company_data])], axis=1) x_new = x_new.drop(columns=["companies"]) # Drop original column # Efficient Multi-Hot Encoding for Topics topic_data = {topic: 1 if topic in x_new["related_topics"].iloc[0] else 0 for topic in the_topics} x_new = pd.concat([x_new, pd.DataFrame([topic_data])], axis=1) x_new = x_new.drop(columns=["related_topics"]) # Drop original topics column # Label encode 'title' title_model = joblib.load("title_encoder.pkl") x_new['title'] = title_model.fit_transform(x_new['title']) if predict == "related topics": vectorizer = joblib.load("related_topics_vectorizer.pkl") new_tfidf = vectorizer.transform(x_new["description"]) best_model_info = joblib.load('best_model_related_topics_info.pkl') best_model = joblib.load("best_related_topics_model.pkl") optimizer = MultiLabelThresholdOptimizer() optimizer.optimal_thresholds[best_model_info['model_name']] = best_model_info['threshold'] predictions = optimizer.predict(best_model, new_tfidf, best_model_info['model_name']) mlb = joblib.load("related_topics_label_binarizer.pkl") predictions = mlb.inverse_transform(predictions) ans = f"the related topics are: {', '.join(map(str, predictions[0]))}" return ans else: vectorizer = joblib.load("tfidf_vectorizer.pkl") new_tfidf = vectorizer.transform(x_new["description"]) # Convert to DataFrame new_tfidf_df = pd.DataFrame(new_tfidf.toarray(), columns=vectorizer.get_feature_names_out()) x_new = pd.concat([x_new, new_tfidf_df], axis=1) x_new = x_new.drop(columns=['description']) if predict == "difficulty level": # load the dislike model because there is no dislike in the input dislikes_model, feature_names = joblib.load("dislikes_XGB_regression_model.pkl") x_new_filtered = x_new[feature_names] # Select only the required features dislike = dislikes_model.predict(x_new_filtered) x_new['dislikes'] = dislike[0] x_new['rating']: convert_to_float(likes) / (convert_to_float(likes) + dislike[0]) # Load the model class_model = joblib.load("level_classifier_model.pkl") # Get feature names from trained model trained_feature_names = class_model.named_steps['standardscaler'].get_feature_names_out() x_new = x_new[trained_feature_names] # Reorder and remove extra columns # Fill missing columns with 0 (or a suitable default) for col in trained_feature_names: if col not in x_new: x_new[col] = 0 # or another default value x_new = x_new[trained_feature_names] # Ensure correct order again predictions = class_model.predict(x_new) if predictions == 1: prediction = "Hard" elif predictions == 0: prediction = "Easy" elif predictions == 2: prediction = "Medium" ans = f"the level difficulty is: {prediction}" return ans elif predict == "acceptance": # Load the model accepted_submissions_model, feature_names = joblib.load("accepted_submissions_regression_model.pkl") # Assuming `X_new` is a DataFrame with extra features x_new_filtered = x_new[feature_names] # Select only the required features predictions = accepted_submissions_model.predict(x_new_filtered) ans = f"the accepted is: {convert_to_string(predictions[0])}" return ans elif predict == "number of likes": # Load the model likes_model, feature_names = joblib.load("likes_random_forest_regression_model.pkl") # Assuming `X_new` is a DataFrame with extra features x_new_filtered = x_new[feature_names] # Select only the required features predictions = likes_model.predict(x_new_filtered) ans = f"the likes amount is: {convert_to_string(predictions[0])}" return ans elif predict == "number of dislikes": # Load the model dislikes_model, feature_names = joblib.load("dislikes_XGB_regression_model.pkl") # Assuming `x_new` is a DataFrame with extra features x_new_filtered = x_new[feature_names] # Select only the required features predictions = dislikes_model.predict(x_new_filtered) ans = f"the dislikes amount is: {convert_to_string(predictions[0])}" return ans with open("encoding_metadata.json", "r") as f: encoding_metadata = json.load(f) the_topics = encoding_metadata["related_topics_columns"] the_topics.remove("") companies_columns = encoding_metadata["companies_columns"] companies_columns.remove("") demo = gr.Interface( fn=greet, inputs=[gr.Text(label="Title"), gr.Text(label="Description"), gr.Radio(choices=["Easy", "Medium", "Hard"], label="Difficulty Level"), gr.Dropdown(the_topics, multiselect=True, label="Related Topics", info="choose all the related topics of this question"), gr.Text(label="Likes Amount"), gr.Text(label="Accepted Amount"), gr.Text(label="Submission Amount"), gr.Text(label="Comments Amount"), gr.Radio(choices=["premium", "not premium"], label="Is Premium"), gr.Radio(choices=["acceptance", "difficulty level", "number of likes", "number of dislikes", "related topics"], label="Please Predict..") ], outputs=[gr.Text(label="The Prediction")], title="LEETCODE PREDICTOR", description="please go to the leetcode website (https://leetcode.com/problemset/) choose a question and copy the question's detiles to the relevant spaces, then choose what you whould like to predict and submit. the prediction result will appear on the right side of the screen 😉" ) demo.launch()