Upload folder using huggingface_hub

Browse files

Files changed (13) hide show

.idea/.gitignore +3 -0
.idea/PPO.iml +8 -0
.idea/inspectionProfiles/Project_Default.xml +12 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +4 -0
.idea/modules.xml +8 -0
.idea/workspace.xml +61 -0
README.md +1 -1
hyperparameters.json +1 -1
main.py +265 -0
model.pt +1 -1
requirements.txt +1 -0
results.json +1 -1

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+# Default ignored files
+/shelf/
+/workspace.xml

.idea/PPO.iml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

.idea/inspectionProfiles/Project_Default.xml ADDED Viewed

	@@ -0,0 +1,12 @@

+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <option name="ignoredErrors">
+        <list>
+          <option value="N802" />
+        </list>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,4 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="py39" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/PPO.iml" filepath="$PROJECT_DIR$/.idea/PPO.iml" />
+    </modules>
+  </component>
+</project>

.idea/workspace.xml ADDED Viewed

	@@ -0,0 +1,61 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="AutoImportSettings">
+    <option name="autoReloadType" value="SELECTIVE" />
+  </component>
+  <component name="ChangeListManager">
+    <list default="true" id="82c11718-8476-4ed5-8f5a-34544b12ac29" name="Changes" comment="" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="MarkdownSettingsMigration">
+    <option name="stateVersion" value="1" />
+  </component>
+  <component name="ProjectId" id="2Poxmc2HHyhQ85i4TLtzuLFsOBK" />
+  <component name="ProjectViewState">
+    <option name="hideEmptyMiddlePackages" value="true" />
+    <option name="showLibraryContents" value="true" />
+  </component>
+  <component name="PropertiesComponent">{
+  &quot;keyToString&quot;: {
+    &quot;RunOnceActivity.OpenProjectViewOnStart&quot;: &quot;true&quot;,
+    &quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;
+  }
+}</component>
+  <component name="RunManager">
+    <configuration name="main" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
+      <module name="PPO" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/main.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+  </component>
+  <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="82c11718-8476-4ed5-8f5a-34544b12ac29" name="Changes" comment="" />
+      <created>1684137395396</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1684137395396</updated>
+    </task>
+    <servers />
+  </component>
+</project>

README.md CHANGED Viewed

@@ -16,7 +16,7 @@ model-index:
       type: Pixelcopter-PLE-v0
     metrics:
     - type: mean_reward
-      value: 9.20 +/- 9.04
       name: mean_reward
       verified: false
 ---

       type: Pixelcopter-PLE-v0
     metrics:
     - type: mean_reward
+      value: 0.00 +/- 0.00
       name: mean_reward
       verified: false
 ---

hyperparameters.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"h_size": 64, "n_training_episodes": ~~50000~~, "n_evaluation_episodes": 10, "max_t": 10000, "gamma": 0.99, "lr": 0.0001, "env_id": "Pixelcopter-PLE-v0", "state_space": 7, "action_space": 2}


1	+ {"h_size": 64, "n_training_episodes": 1000, "n_evaluation_episodes": 10, "max_t": 10000, "gamma": 0.99, "lr": 0.0001, "env_id": "Pixelcopter-PLE-v0", "state_space": 7, "action_space": 2}

main.py ADDED Viewed

	@@ -0,0 +1,265 @@

+import json
+import datetime
+import tempfile
+import numpy as np
+from collections import deque
+# PyTorch
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from huggingface_hub import metadata_eval_result, HfApi, metadata_save
+from torch.distributions import Categorical
+# Gym
+import gym
+import gym_pygame
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+print(device)
+env_id = "Pixelcopter-PLE-v0"
+env = gym.make(env_id)
+eval_env = gym.make(env_id)
+s_size = env.observation_space.shape[0]
+a_size = env.action_space.n
+class Policy(nn.Module):
+    def __init__(self, s_size, a_size, h_size):
+        super(Policy, self).__init__()
+        self.fc1 = nn.Linear(s_size, h_size)
+        self.fc2 = nn.Linear(h_size, h_size * 2)
+        self.fc3 = nn.Linear(h_size * 2, a_size)
+    def forward(self, x):
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = self.fc3(x)
+        return F.softmax(x, dim=1)
+    def act(self, state):
+        state = torch.from_numpy(state).float().unsqueeze(0).to(device)
+        probs = self.forward(state).cpu()
+        m = Categorical(probs)
+        action = m.sample()
+        return action.item(), m.log_prob(action)
+def reinforce(policy, optimizer, n_training_episodes, max_t, gamma, print_every):
+    # Help us to calculate the score during the training
+    scores_deque = deque(maxlen=100)
+    scores = []
+    # Line 3 of pseudocode
+    for i_episode in range(1, n_training_episodes + 1):
+        saved_log_probs = []
+        rewards = []
+        state = env.reset()
+        # Line 4 of pseudocode
+        for t in range(max_t):
+            action, log_prob = policy.act(state)
+            saved_log_probs.append(log_prob)
+            state, reward, done, _ = env.step(action)
+            rewards.append(reward)
+            if done:
+                break
+        scores_deque.append(sum(rewards))
+        scores.append(sum(rewards))
+        # Line 6 of pseudocode: calculate the return
+        returns = deque(maxlen=max_t)
+        n_steps = len(rewards)
+        # Compute the discounted returns at each timestep,
+        # as
+        #      the sum of the gamma-discounted return at time t (G_t) + the reward at time t
+        #
+        # In O(N) time, where N is the number of time steps
+        # (this definition of the discounted return G_t follows the definition of this quantity
+        # shown at page 44 of Sutton&Barto 2017 2nd draft)
+        # G_t = r_(t+1) + r_(t+2) + ...
+        # Given this formulation, the returns at each timestep t can be computed
+        # by re-using the computed future returns G_(t+1) to compute the current return G_t
+        # G_t = r_(t+1) + gamma*G_(t+1)
+        # G_(t-1) = r_t + gamma* G_t
+        # (this follows a dynamic programming approach, with which we memorize solutions in order
+        # to avoid computing them multiple times)
+        # This is correct since the above is equivalent to (see also page 46 of Sutton&Barto 2017 2nd draft)
+        # G_(t-1) = r_t + gamma*r_(t+1) + gamma*gamma*r_(t+2) + ...
+        ## Given the above, we calculate the returns at timestep t as:
+        #               gamma[t] * return[t] + reward[t]
+        #
+        ## We compute this starting from the last timestep to the first, in order
+        ## to employ the formula presented above and avoid redundant computations that would be needed
+        ## if we were to do it from first to last.
+        ## Hence, the queue "returns" will hold the returns in chronological order, from t=0 to t=n_steps
+        ## thanks to the appendleft() function which allows to append to the position 0 in constant time O(1)
+        ## a normal python list would instead require O(N) to do this.
+        for t in range(n_steps)[::-1]:
+            disc_return_t = (returns[0] if len(returns) > 0 else 0)
+            returns.appendleft(gamma * disc_return_t + rewards[t])
+            ## standardization of the returns is employed to make training more stable
+        eps = np.finfo(np.float32).eps.item()
+        ## eps is the smallest representable float, which is
+        # added to the standard deviation of the returns to avoid numerical instabilities
+        returns = torch.tensor(returns)
+        returns = (returns - returns.mean()) / (returns.std() + eps)
+        # Line 7:
+        policy_loss = []
+        for log_prob, disc_return in zip(saved_log_probs, returns):
+            policy_loss.append(-log_prob * disc_return)
+        policy_loss = torch.cat(policy_loss).sum()
+        # Line 8: PyTorch prefers gradient descent
+        optimizer.zero_grad()
+        policy_loss.backward()
+        optimizer.step()
+        if i_episode % print_every == 0:
+            print('Episode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)))
+    return scores
+pixelcopter_hyperparameters = {
+    "h_size": 64,
+    "n_training_episodes": 1000,
+    "n_evaluation_episodes": 10,
+    "max_t": 10000,
+    "gamma": 0.99,
+    "lr": 1e-4,
+    "env_id": env_id,
+    "state_space": s_size,
+    "action_space": a_size,
+}
+# Create policy and place it to the device
+# torch.manual_seed(50)
+pixelcopter_policy = Policy(pixelcopter_hyperparameters["state_space"], pixelcopter_hyperparameters["action_space"],
+                            pixelcopter_hyperparameters["h_size"]).to(device)
+pixelcopter_optimizer = optim.Adam(pixelcopter_policy.parameters(), lr=pixelcopter_hyperparameters["lr"])
+scores = reinforce(pixelcopter_policy,
+                   pixelcopter_optimizer,
+                   pixelcopter_hyperparameters["n_training_episodes"],
+                   pixelcopter_hyperparameters["max_t"],
+                   pixelcopter_hyperparameters["gamma"],
+                   1000)
+def push_to_hub(repo_id,
+                model,
+                hyperparameters,
+                ):
+    """
+    Evaluate, Generate a video and Upload a model to Hugging Face Hub.
+    This method does the complete pipeline:
+    - It evaluates the model
+    - It generates the model card
+    - It generates a replay video of the agent
+    - It pushes everything to the Hub
+    :param repo_id: repo_id: id of the model repository from the Hugging Face Hub
+    :param model: the pytorch model we want to save
+    :param hyperparameters: training hyperparameters
+    :param eval_env: evaluation environment
+    :param video_fps: how many frame per seconds to record our video replay
+    """
+    _, repo_name = repo_id.split("/")
+    api = HfApi()
+    # Step 1: Create the repo
+    repo_url = api.create_repo(
+        repo_id=repo_id,
+        exist_ok=True,
+    )
+    # Step 2: Save the model
+    torch.save(model, "model.pt")
+    # Step 3: Save the hyperparameters to JSON
+    with open("hyperparameters.json", "w") as outfile:
+        json.dump(hyperparameters, outfile)
+    # Step 4: Evaluate the model and build JSON
+    mean_reward, std_reward = 0, 0
+    # Get datetime
+    eval_datetime = datetime.datetime.now()
+    eval_form_datetime = eval_datetime.isoformat()
+    evaluate_data = {
+        "env_id": hyperparameters["env_id"],
+        "mean_reward": mean_reward,
+        "n_evaluation_episodes": hyperparameters["n_evaluation_episodes"],
+        "eval_datetime": eval_form_datetime,
+    }
+    # Write a JSON file
+    with open("results.json", "w") as outfile:
+        json.dump(evaluate_data, outfile)
+    # Step 5: Create the model card
+    env_name = hyperparameters["env_id"]
+    metadata = {}
+    metadata["tags"] = [
+        env_name,
+        "reinforce",
+        "reinforcement-learning",
+        "custom-implementation",
+        "deep-rl-class"
+    ]
+    # Add metrics
+    eval = metadata_eval_result(
+        model_pretty_name=repo_name,
+        task_pretty_name="reinforcement-learning",
+        task_id="reinforcement-learning",
+        metrics_pretty_name="mean_reward",
+        metrics_id="mean_reward",
+        metrics_value=f"{mean_reward:.2f} +/- {std_reward:.2f}",
+        dataset_pretty_name=env_name,
+        dataset_id=env_name,
+    )
+    # Merges both dictionaries
+    metadata = {**metadata, **eval}
+    model_card = f"""
+  # **Reinforce** Agent playing **{env_id}**
+  This is a trained model of a **Reinforce** agent playing **{env_id}** .
+  To learn to use this model and train yours check Unit 4 of the Deep Reinforcement Learning Course: https://huggingface.co/deep-rl-course/unit4/introduction
+  """
+    readme_path = "README.md"
+    readme = model_card
+    with open(readme_path, "w", encoding="utf-8") as f:
+        f.write(readme)
+    # Save our metrics to Readme metadata
+    metadata_save(readme_path, metadata)
+    # Step 7. Push everything to the Hub
+    api.upload_folder(
+        repo_id=repo_id,
+        folder_path=".",
+        path_in_repo=".",
+    )
+    print(f"Your model is pushed to the Hub. You can view your model here: {repo_url}")
+repo_id = "cyrodw/Reinforce-Pixelcopter"  # TODO Define your repo id {username/Reinforce-{model-id}}
+push_to_hub(repo_id,
+            pixelcopter_policy,  # The model we want to save
+            pixelcopter_hyperparameters,  # Hyperparameters
+            )

model.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:55025a9fff5526ca57e6dc693ebc675a17d81762f8b7b24e7ddace646a8edd9b
 size 39239

 version https://git-lfs.github.com/spec/v1
+oid sha256:d8cec34e623aac7a14072410390f26a7ea6c16ea4382739edc385a26fb73e7b8
 size 39239

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ gymnasium

results.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"env_id": "Pixelcopter-PLE-v0", "mean_reward": ~~9.2~~, "n_evaluation_episodes": 10, "eval_datetime": "2023-05-~~10T12~~:37:14.~~411368~~"}


1	+ {"env_id": "Pixelcopter-PLE-v0", "mean_reward": 0, "n_evaluation_episodes": 10, "eval_datetime": "2023-05-16T15:12:27.011351"}