import gradio as gr from huggingface_hub import HfApi, hf_hub_download from huggingface_hub.repocard import metadata_load import pandas as pd from utils import * api = HfApi() def get_user_models(hf_username, env_tag, lib_tag): """ List the Reinforcement Learning models from user given environment and lib :param hf_username: User HF username :param env_tag: Environment tag :param lib_tag: Library tag """ api = HfApi() models = api.list_models(author=hf_username, filter=["reinforcement-learning", env_tag, lib_tag]) user_model_ids = [x.modelId for x in models] return user_model_ids def get_metadata(model_id): """ Get model metadata (contains evaluation data) :param model_id """ try: readme_path = hf_hub_download(model_id, filename="README.md") return metadata_load(readme_path) except requests.exceptions.HTTPError: # 404 README.md not found return None def parse_metrics_accuracy(meta): """ Get model results and parse it :param meta: model metadata """ if "model-index" not in meta: return None result = meta["model-index"][0]["results"] metrics = result[0]["metrics"] accuracy = metrics[0]["value"] return accuracy def parse_rewards(accuracy): """ Parse mean_reward and std_reward :param accuracy: model results """ default_std = -1000 default_reward= -1000 if accuracy != None: accuracy = str(accuracy) parsed = accuracy.split(' +/- ') if len(parsed)>1: mean_reward = float(parsed[0]) std_reward = float(parsed[1]) elif len(parsed)==1: #only mean reward mean_reward = float(parsed[0]) std_reward = float(0) else: mean_reward = float(default_std) std_reward = float(default_reward) else: mean_reward = float(default_std) std_reward = float(default_reward) return mean_reward, std_reward def calculate_best_result(user_model_ids): """ Calculate the best results of a unit best_result = mean_reward - std_reward :param user_model_ids: RL models of a user """ best_result = -100 best_model_id = "" for model in user_model_ids: meta = get_metadata(model) if meta is None: continue accuracy = parse_metrics_accuracy(meta) mean_reward, std_reward = parse_rewards(accuracy) result = mean_reward - std_reward if result > best_result: best_result = result best_model_id = model return best_result, best_model_id def check_if_passed(model): """ Check if result >= baseline to know if you pass :param model: user model """ if model["best_result"] >= model["min_result"]: model["passed"] = True def test_(hf_username): results_certification = [ { "unit": "Unit 1", "env": "LunarLander-v2", "library": "stable-baselines3", "min_result": 200, "best_result": 0, "best_model_id": "", "passed": False }, { "unit": "Unit 2", "env": "Taxi-v3", "library": "q-learning", "min_result": 4, "best_result": 0, "best_model_id": "", "passed": False }, { "unit": "Unit 3", "env": "SpaceInvadersNoFrameskip-v4", "library": "stable-baselines3", "min_result": 200, "best_result": 0, "best_model_id": "", "passed": False }, { "unit": "Unit 4", "env": "CartPole-v1", "library": "reinforce", "min_result": 350, "best_result": 0, "best_model_id": "", "passed": False }, { "unit": "Unit 4", "env": "Pixelcopter-PLE-v0", "library": "reinforce", "min_result": 5, "best_result": 0, "best_model_id": "", "passed": False }, { "unit": "Unit 5", "env": "ML-Agents-SnowballTarget", "library": "ml-agents", "min_result": -100, "best_result": 0, "best_model_id": "", "passed": False }, { "unit": "Unit 5", "env": "ML-Agents-Pyramids", "library": "ml-agents", "min_result": -100, "best_result": 0, "best_model_id": "", "passed": False }, { "unit": "Unit 6", "env": "AntBulletEnv-v0", "library": "stable-baselines3", "min_result": 650, "best_result": 0, "best_model_id": "", "passed": False }, { "unit": "Unit 6", "env": "PandaReachDense-v2", "library": "stable-baselines3", "min_result": -3.5, "best_result": 0, "best_model_id": "", "passed": False }, { "unit": "Unit 7", "env": "ML-Agents-SoccerTwos", "library": "ml-agents", "min_result": -100, "best_result": 0, "best_model_id": "", "passed": False }, { "unit": "Unit 8 Part 1", "env": "GodotRL-JumperHard", "library": "cleanrl", "min_result": -100, "best_result": 0, "best_model_id": "", "passed": False }, { "unit": "Unit 8 Part 2", "env": "Vizdoom-Battle", "library": "cleanrl", "min_result": -100, "best_result": 0, "best_model_id": "", "passed": False }, ] for unit in results_certification: # Get user model user_models = get_user_models(hf_username, unit['env'], unit['library']) print(user_models) # Calculate the best result and get the best_model_id best_result, best_model_id = calculate_best_result(user_models) # Save best_result and best_model_id unit["best_result"] = best_result unit["best_model_id"] = make_clickable_model(best_model_id) # Based on best_result do we pass the unit? check_if_passed(unit) #pass_emoji(unit["passed"]) print(results_certification) df = pd.DataFrame (results_certification) return df with gr.Blocks() as demo: gr.Markdown(f""" # 🏆 Check your progress in the Deep Reinforcement Learning Course 🏆 You can check your progress here. - To get a certificate of completion, you must **pass 80% of the assignments before the end of April 2023**. - To get an honors certificate, you must **pass 100% of the assignments before the end of April 2023**. To pass an assignment your model result (mean_reward - std_reward) must be >= min_result **When min_result = -100 it means that you just need to push a model to pass this hands-on. No need to reach a certain result.** Just type your Hugging Face Username 🤗 (in my case ThomasSimonini) """) hf_username = gr.Textbox(placeholder="ThomasSimonini", label="Your Hugging Face Username") #email = gr.Textbox(placeholder="thomas.simonini@huggingface.co", label="Your Email (to receive your certificate)") check_progress_button = gr.Button(value="Check my progress") output = gr.components.Dataframe(value= test_(hf_username), headers=["Unit", "Environment", "Library", "Baseline", "Your best result", "Your best model id", "Pass?"], datatype=["markdown", "markdown", "markdown", "number", "number", "markdown", "bool"]) check_progress_button.click(fn=test_, inputs=hf_username, outputs=output) demo.launch()