marinone94 commited on
Commit
e417b0c
1 Parent(s): 0eb0b1e

use python script to clone repo and trigger train

Browse files
run_sm.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ Python script that triggers sagemaker flow"""
2
+
3
+ import sys
4
+ import subprocess
5
+
6
+ def main():
7
+ # Let's skip arg names
8
+ repo = sys.argv[2]
9
+ repo_name = repo.split('/')[-1]
10
+ script_name = sys.argv[4]
11
+ cmd = f'git clone {repo} && cd {repo_name} && sh {script_name}'
12
+ # subprocess.call(cmd, shell=True)
13
+ raise ValueError(cmd)
14
+
15
+ if __name__ == '__main__':
16
+ main()
run_speech_recognition_seq2seq_streaming.py CHANGED
@@ -19,8 +19,7 @@ with 🤗 Datasets' streaming mode.
19
  """
20
  # You can also adapt this script for your own sequence to sequence speech
21
  # recognition task. Pointers for this are left as comments.
22
- import os
23
- raise RuntimeError(f"{os.getcwd()}")
24
  import json
25
  import logging
26
  import os
 
19
  """
20
  # You can also adapt this script for your own sequence to sequence speech
21
  # recognition task. Pointers for this are left as comments.
22
+
 
23
  import json
24
  import logging
25
  import os
sm.py CHANGED
@@ -13,7 +13,7 @@ TEST = True
13
 
14
 
15
  test_sm_instances = {
16
- "ml.g4dn.2xlarge":
17
  {
18
  "num_instances": 1,
19
  "num_gpus": 1
@@ -30,7 +30,7 @@ full_sm_instances = {
30
 
31
  sm_instances = test_sm_instances if TEST else full_sm_instances
32
 
33
- ENTRY_POINT = "run_speech_recognition_seq2seq_streaming.py"
34
  RUN_SCRIPT = "test_run.sh" if TEST else "run.sh"
35
  IMAGE_URI = "116817510867.dkr.ecr.eu-west-1.amazonaws.com/huggingface-pytorch-training:whisper-finetuning-0223e276db78adf4ea4dc5f874793cb2"
36
  if IMAGE_URI is None:
@@ -66,7 +66,6 @@ def parse_run_script():
66
  .replace("\n", "") \
67
  .replace('"', "")
68
  line = line.split("=")
69
- # remove '\t--'
70
  key = str(line[0])
71
  try:
72
  value = line[1]
@@ -78,8 +77,8 @@ def parse_run_script():
78
 
79
 
80
  set_creds()
81
- hyperparameters = parse_run_script()
82
- pprint(hyperparameters)
83
 
84
  hf_token = os.environ.get("HF_TOKEN")
85
  if hf_token is None:
@@ -93,6 +92,10 @@ env_vars = {
93
  }
94
  pprint(env_vars)
95
  repo = f"https://huggingface.co/marinone94/{os.getcwd().split('/')[-1]}"
 
 
 
 
96
  for sm_instance_name, sm_instance_values in sm_instances.items():
97
  num_instances: int = \
98
  int(sm_instance_values["num_instances"])
 
13
 
14
 
15
  test_sm_instances = {
16
+ "ml.g4dn.xlarge":
17
  {
18
  "num_instances": 1,
19
  "num_gpus": 1
 
30
 
31
  sm_instances = test_sm_instances if TEST else full_sm_instances
32
 
33
+ ENTRY_POINT = "run_sm.py"
34
  RUN_SCRIPT = "test_run.sh" if TEST else "run.sh"
35
  IMAGE_URI = "116817510867.dkr.ecr.eu-west-1.amazonaws.com/huggingface-pytorch-training:whisper-finetuning-0223e276db78adf4ea4dc5f874793cb2"
36
  if IMAGE_URI is None:
 
66
  .replace("\n", "") \
67
  .replace('"', "")
68
  line = line.split("=")
 
69
  key = str(line[0])
70
  try:
71
  value = line[1]
 
77
 
78
 
79
  set_creds()
80
+ # hyperparameters = parse_run_script()
81
+ # pprint(hyperparameters)
82
 
83
  hf_token = os.environ.get("HF_TOKEN")
84
  if hf_token is None:
 
92
  }
93
  pprint(env_vars)
94
  repo = f"https://huggingface.co/marinone94/{os.getcwd().split('/')[-1]}"
95
+ hyperparameters = {
96
+ "repo": repo,
97
+ "entrypoint": RUN_SCRIPT
98
+ }
99
  for sm_instance_name, sm_instance_values in sm_instances.items():
100
  num_instances: int = \
101
  int(sm_instance_values["num_instances"])