noa151's picture
Upload app.py
27b4cbb verified
import json
import gradio as gr
import joblib
import pandas as pd
from related_topics_prediction import MultiLabelThresholdOptimizer
def convert_to_float(value):
if 'K' in value:
return float(value.replace('K', '')) * 1_000
elif 'M' in value:
return float(value.replace('M', '')) * 1_000_000
return float(value) # If it's already a number
def convert_to_string(value):
if value >= 1_000_000:
return f"{value / 1_000_000:.1f}M"
elif value >= 1_000:
return f"{value / 1_000:.1f}K"
return str(int(value)) # Keep it as an integer if it's below 1,000
def greet(title, description, difficulty, topics, likes, accepted, submission, comments, is_premium, predict):
x_new = pd.DataFrame([{
'id': 1,
'title': str(title),
'description': str(description),
'is_premium': 1 if is_premium == "premium" else 0,
'difficulty': 0 if difficulty == "Easy" else 1 if difficulty == "Hard" else 2,
'acceptance_rate': convert_to_float(accepted)/convert_to_float(submission),
'frequency': 0,
'discuss_count': float(comments),
'accepted': convert_to_float(accepted),
'submissions': convert_to_float(submission),
'companies': [""],
'related_topics': topics.split(',') if isinstance(topics, str) else topics,
'likes': convert_to_float(likes),
'dislikes': 0,
'rating': convert_to_float(likes) / (convert_to_float(likes) + 0),
'asked_by_faang': 0,
'similar_questions': ""
}])
# Efficient Multi-Hot Encoding for Companies
company_data = {company: 1 if company in x_new["companies"].iloc[0] else 0 for company in companies_columns}
x_new = pd.concat([x_new, pd.DataFrame([company_data])], axis=1)
x_new = x_new.drop(columns=["companies"]) # Drop original column
# Efficient Multi-Hot Encoding for Topics
topic_data = {topic: 1 if topic in x_new["related_topics"].iloc[0] else 0 for topic in the_topics}
x_new = pd.concat([x_new, pd.DataFrame([topic_data])], axis=1)
x_new = x_new.drop(columns=["related_topics"]) # Drop original topics column
# Label encode 'title'
title_model = joblib.load("title_encoder.pkl")
x_new['title'] = title_model.fit_transform(x_new['title'])
if predict == "related topics":
vectorizer = joblib.load("related_topics_vectorizer.pkl")
new_tfidf = vectorizer.transform(x_new["description"])
best_model_info = joblib.load('best_model_related_topics_info.pkl')
best_model = joblib.load("best_related_topics_model.pkl")
optimizer = MultiLabelThresholdOptimizer()
optimizer.optimal_thresholds[best_model_info['model_name']] = best_model_info['threshold']
predictions = optimizer.predict(best_model, new_tfidf, best_model_info['model_name'])
mlb = joblib.load("related_topics_label_binarizer.pkl")
predictions = mlb.inverse_transform(predictions)
ans = f"the related topics are: {', '.join(map(str, predictions[0]))}"
return ans
else:
vectorizer = joblib.load("tfidf_vectorizer.pkl")
new_tfidf = vectorizer.transform(x_new["description"])
# Convert to DataFrame
new_tfidf_df = pd.DataFrame(new_tfidf.toarray(), columns=vectorizer.get_feature_names_out())
x_new = pd.concat([x_new, new_tfidf_df], axis=1)
x_new = x_new.drop(columns=['description'])
if predict == "difficulty level":
# load the dislike model because there is no dislike in the input
dislikes_model, feature_names = joblib.load("dislikes_XGB_regression_model.pkl")
x_new_filtered = x_new[feature_names] # Select only the required features
dislike = dislikes_model.predict(x_new_filtered)
x_new['dislikes'] = dislike[0]
x_new['rating']: convert_to_float(likes) / (convert_to_float(likes) + dislike[0])
# Load the model
class_model = joblib.load("level_classifier_model.pkl")
# Get feature names from trained model
trained_feature_names = class_model.named_steps['standardscaler'].get_feature_names_out()
x_new = x_new[trained_feature_names] # Reorder and remove extra columns
# Fill missing columns with 0 (or a suitable default)
for col in trained_feature_names:
if col not in x_new:
x_new[col] = 0 # or another default value
x_new = x_new[trained_feature_names] # Ensure correct order again
predictions = class_model.predict(x_new)
if predictions == 1:
prediction = "Hard"
elif predictions == 0:
prediction = "Easy"
elif predictions == 2:
prediction = "Medium"
ans = f"the level difficulty is: {prediction}"
return ans
elif predict == "acceptance":
# Load the model
accepted_submissions_model, feature_names = joblib.load("accepted_submissions_regression_model.pkl")
# Assuming `X_new` is a DataFrame with extra features
x_new_filtered = x_new[feature_names] # Select only the required features
predictions = accepted_submissions_model.predict(x_new_filtered)
ans = f"the accepted is: {convert_to_string(predictions[0])}"
return ans
elif predict == "number of likes":
# Load the model
likes_model, feature_names = joblib.load("likes_random_forest_regression_model.pkl")
# Assuming `X_new` is a DataFrame with extra features
x_new_filtered = x_new[feature_names] # Select only the required features
predictions = likes_model.predict(x_new_filtered)
ans = f"the likes amount is: {convert_to_string(predictions[0])}"
return ans
elif predict == "number of dislikes":
# Load the model
dislikes_model, feature_names = joblib.load("dislikes_XGB_regression_model.pkl")
# Assuming `x_new` is a DataFrame with extra features
x_new_filtered = x_new[feature_names] # Select only the required features
predictions = dislikes_model.predict(x_new_filtered)
ans = f"the dislikes amount is: {convert_to_string(predictions[0])}"
return ans
with open("encoding_metadata.json", "r") as f:
encoding_metadata = json.load(f)
the_topics = encoding_metadata["related_topics_columns"]
the_topics.remove("")
companies_columns = encoding_metadata["companies_columns"]
companies_columns.remove("")
demo = gr.Interface(
fn=greet,
inputs=[gr.Text(label="Title"), gr.Text(label="Description"),
gr.Radio(choices=["Easy", "Medium", "Hard"], label="Difficulty Level"),
gr.Dropdown(the_topics, multiselect=True, label="Related Topics",
info="choose all the related topics of this question"),
gr.Text(label="Likes Amount"),
gr.Text(label="Accepted Amount"),
gr.Text(label="Submission Amount"),
gr.Text(label="Comments Amount"),
gr.Radio(choices=["premium", "not premium"], label="Is Premium"),
gr.Radio(choices=["acceptance", "difficulty level", "number of likes", "number of dislikes",
"related topics"], label="Please Predict..")
],
outputs=[gr.Text(label="The Prediction")],
title="LEETCODE PREDICTOR",
description="please go to the leetcode website (https://leetcode.com/problemset/) choose a question and copy the question's detiles to the relevant spaces, then choose what you whould like to predict and submit. the prediction result will appear on the right side of the screen πŸ˜‰"
)
demo.launch()