Spaces:

ThomasSimonini
/

Check-my-progress-Deep-RL-Course

Running

File size: 5,316 Bytes

import gradio as gr
from huggingface_hub import HfApi, hf_hub_download
from huggingface_hub.repocard import metadata_load

import pandas as pd

api = HfApi()

def get_user_models(hf_username, env_tag, lib_tag):
    """
    List the Reinforcement Learning models
    from user given environment and lib
    :param hf_username: User HF username
    :param env_tag: Environment tag
    :param lib_tag: Library tag
    """
    api = HfApi()
    models = api.list_models(author=hf_username, filter=["reinforcement-learning", env_tag, lib_tag])

    user_model_ids = [x.modelId for x in models]
    return user_model_ids


def get_metadata(model_id):
  """
  Get model metadata (contains evaluation data)
  :param model_id
  """
  try:
    readme_path = hf_hub_download(model_id, filename="README.md")
    return metadata_load(readme_path)
  except requests.exceptions.HTTPError:
    # 404 README.md not found
    return None


def parse_metrics_accuracy(meta):
  """
  Get model results and parse it
  :param meta: model metadata
  """
  if "model-index" not in meta:
    return None
  result = meta["model-index"][0]["results"]
  metrics = result[0]["metrics"]
  accuracy = metrics[0]["value"]
  
  return accuracy


def parse_rewards(accuracy):
  """
  Parse mean_reward and std_reward
  :param accuracy: model results
  """
  default_std = -1000
  default_reward= -1000
  if accuracy !=  None:
      accuracy = str(accuracy)
      parsed =  accuracy.split(' +/- ')
      if len(parsed)>1:
          mean_reward = float(parsed[0])
          std_reward =  float(parsed[1])
      elif len(parsed)==1: #only mean reward   
          mean_reward = float(parsed[0])
          std_reward =  float(0)
      else: 
          mean_reward = float(default_std)
          std_reward = float(default_reward)
  else:
      mean_reward = float(default_std)
      std_reward = float(default_reward)
  
  return mean_reward, std_reward

def calculate_best_result(user_model_ids):
  """
  Calculate the best results of a unit
  best_result = mean_reward - std_reward
  :param user_model_ids: RL models of a user
  """
  best_result = -100
  best_model_id = ""
  for model in user_model_ids:
    meta = get_metadata(model)
    if meta is None:
      continue
    accuracy = parse_metrics_accuracy(meta)
    mean_reward, std_reward = parse_rewards(accuracy)
    result = mean_reward - std_reward
    if result > best_result:
      best_result = result
      best_model_id = model
      
  return best_result, best_model_id

def check_if_passed(model):
  """
  Check if result >= baseline
  to know if you pass
  :param model: user model
  """
  if model["best_result"] >= model["min_result"]:
    model["passed"] = True

def test_(hf_username):
  results_certification = [
      {
          "unit": "Unit 1",
          "env": "LunarLander-v2",
          "library": "stable-baselines3",
          "min_result": 200,
          "best_result": 0,
          "best_model_id": "",
          "passed": False
      },
    {
          "unit": "Bonus Unit 1",
          "env": "ML-Agents-Huggy",
          "library": "ml-agents",
          "min_result": -100,
          "best_result": 0,
          "best_model_id": "",
          "passed": False
    },
  {
          "unit": "Unit 2",
          "env": "Taxi-v3",
          "library": "q-learning",
          "min_result": 5,
          "best_result": 0,
          "best_model_id": "",
          "passed": False
  },
  {
          "unit": "Unit 3",
          "env": "SpaceInvadersNoFrameskip-v4",
          "library": "stable-baselines3",
          "min_result": 200,
          "best_result": 0,
          "best_model_id": "",
          "passed": False
  },
  {
          "unit": "Unit 4",
          "env": "CartPole-v1",
          "library": "reinforce",
          "min_result": 350,
          "best_result": 0,
          "best_model_id": "",
          "passed": False
  },
    {
          "unit": "Unit 4",
          "env": "Pixelcopter-PLE-v0",
          "library": "reinforce",
          "min_result": 5,
          "best_result": 0,
          "best_model_id": "",
          "passed": False
    },
  ] 
  for unit in results_certification:
    # Get user model
    user_models = get_user_models(hf_username, unit['env'], unit['library'])
    print(user_models)
    # Calculate the best result and get the best_model_id
    best_result, best_model_id = calculate_best_result(user_models)

    # Save best_result and best_model_id
    unit["best_result"] = best_result
    unit["best_model_id"] = best_model_id

    # Based on best_result do we pass the unit?
    check_if_passed(unit)
    
  print(results_certification)
 
  df = pd.DataFrame (results_certification)

  return df


with gr.Blocks() as demo:
  hf_username = gr.Textbox(placeholder="ThomasSimonini", label="Your Hugging Face Username")
  email = gr.Textbox(placeholder="thomas.simonini@huggingface.co", label="Your Email (to receive your certificate)")
  check_progress_button = gr.Button(value="Check my progress")
  output = gr.components.Dataframe(headers=["Unit", "Environment", "Library", "Baseline", "Your best result", "Your best model id", "Pass?"], datatype=["markdown", "markdown", "markdown", "number", "number", "markdown", "bool"])

  check_progress_button.click(fn=test_, inputs=hf_username, outputs=output)

demo.launch()