nielsbantilan commited on Aug 17, 2023

Commit

1aa01d1

•

1 Parent(s): c0dc2b5

Upload folder using huggingface_hub

Browse files

Files changed (23) hide show

.gitattributes +8 -0
flyte6a_2j95z/local_flytekit/2c23843e4a45ae5249c43c2c168a9827/00000 +3 -0
flyte9y8545zr/local_flytekit/4da14b1253b8076f6a95a51b0669110b/00000 +3 -0
flyte_training_config.json +1 -1
flyteeopmp26r/local_flytekit/8ed8cade608dc55fe9aaec6dd8d60c20/00000 +3 -0
flyteg7cwhdqx/local_flytekit/45e4b36c3a0889b39fe253c6b3c94b2f/00000 +3 -0
flyteraquk0cj/local_flytekit/776069c6405df68fd2755ce257e952ba/00000 +3 -0
flyterpqo54fv/local_flytekit/fd49b76dd3b1ffbc62b1efcef00fd674/00000 +3 -0
flyteyao8jgm7/local_flytekit/67696dba0a579df645b5b2f987a9e4b9/00000 +3 -0
flyteyfv3rs04/local_flytekit/65aa521dee1e8da3c795348937da23ed/00000 +3 -0
pytorch_model-00001-of-00003.bin +1 -1
pytorch_model-00002-of-00003.bin +1 -1
pytorch_model-00003-of-00003.bin +1 -1
tmp2uwb6tgl/_remote_module_non_scriptable.py +81 -0
tmp5210xtp5/_remote_module_non_scriptable.py +81 -0
tmp9bh3sdsi/_remote_module_non_scriptable.py +81 -0
tmphrsaxah2/_remote_module_non_scriptable.py +81 -0
tmpi_fed6hf/_remote_module_non_scriptable.py +81 -0
tmpn7s2kko7/_remote_module_non_scriptable.py +81 -0
tmptwgnkwb4/__pycache__/_remote_module_non_scriptable.cpython-310.pyc +0 -0
tmptwgnkwb4/_remote_module_non_scriptable.py +81 -0
trainer_state.json +598 -16
training_args.bin +1 -1

.gitattributes CHANGED Viewed

@@ -41,3 +41,11 @@ flytek6j4vh44/local_flytekit/b3abf2788c219ecf383e0ae59ed8b535/00000 filter=lfs d
 flyteknfii7qb/local_flytekit/52ebc6e9298c73dfffdc9f2fffeabb0e/00000 filter=lfs diff=lfs merge=lfs -text
 flytem4no92qv/local_flytekit/02d908a57a7bd02757ee87b4326523e8/00000 filter=lfs diff=lfs merge=lfs -text
 flytep9efi7h3/local_flytekit/ae679064ecc696d6e9c73a37fdb3edd8/00000 filter=lfs diff=lfs merge=lfs -text

 flyteknfii7qb/local_flytekit/52ebc6e9298c73dfffdc9f2fffeabb0e/00000 filter=lfs diff=lfs merge=lfs -text
 flytem4no92qv/local_flytekit/02d908a57a7bd02757ee87b4326523e8/00000 filter=lfs diff=lfs merge=lfs -text
 flytep9efi7h3/local_flytekit/ae679064ecc696d6e9c73a37fdb3edd8/00000 filter=lfs diff=lfs merge=lfs -text
+flyte6a_2j95z/local_flytekit/2c23843e4a45ae5249c43c2c168a9827/00000 filter=lfs diff=lfs merge=lfs -text
+flyte9y8545zr/local_flytekit/4da14b1253b8076f6a95a51b0669110b/00000 filter=lfs diff=lfs merge=lfs -text
+flyteeopmp26r/local_flytekit/8ed8cade608dc55fe9aaec6dd8d60c20/00000 filter=lfs diff=lfs merge=lfs -text
+flyteg7cwhdqx/local_flytekit/45e4b36c3a0889b39fe253c6b3c94b2f/00000 filter=lfs diff=lfs merge=lfs -text
+flyteraquk0cj/local_flytekit/776069c6405df68fd2755ce257e952ba/00000 filter=lfs diff=lfs merge=lfs -text
+flyterpqo54fv/local_flytekit/fd49b76dd3b1ffbc62b1efcef00fd674/00000 filter=lfs diff=lfs merge=lfs -text
+flyteyao8jgm7/local_flytekit/67696dba0a579df645b5b2f987a9e4b9/00000 filter=lfs diff=lfs merge=lfs -text
+flyteyfv3rs04/local_flytekit/65aa521dee1e8da3c795348937da23ed/00000 filter=lfs diff=lfs merge=lfs -text

flyte6a_2j95z/local_flytekit/2c23843e4a45ae5249c43c2c168a9827/00000 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:067772915d011157436dc1ea88cb38756555e25be2d07616d1ee97dfac6e6535
+size 133886409

flyte9y8545zr/local_flytekit/4da14b1253b8076f6a95a51b0669110b/00000 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:067772915d011157436dc1ea88cb38756555e25be2d07616d1ee97dfac6e6535
+size 133886409

flyte_training_config.json CHANGED Viewed

@@ -1 +1 @@

- {"base_model": "meta-llama/Llama-2-7b-hf", "data_path": "wikipedia", "data_name": "20220301.simple", "num_epochs": 1, "max_steps": 30, "learning_rate": 2e-05, "weight_decay": 0.02, "warmup_ratio": 0.03, "lr_scheduler_type": "cosine", "batch_size": 4, "micro_batch_size": 1, "val_set_size": 0, "group_by_length": false, "instruction_key": "instruction", "input_key": "input", "output_key": "output", "device_map": "auto", "cache_dir": null, "optim": "adamw_torch", "model_max_length": 512, "debug_mode": false, "debug_train_data_size": 1024, "wandb_project": ""}

+ {"base_model": "meta-llama/Llama-2-7b-hf", "data_path": "wikipedia", "data_name": "20220301.simple", "num_epochs": 1, "max_steps": 100, "learning_rate": 2e-05, "weight_decay": 0.02, "warmup_ratio": 0.03, "lr_scheduler_type": "cosine", "batch_size": 4, "micro_batch_size": 1, "val_set_size": 0, "group_by_length": false, "instruction_key": "instruction", "input_key": "input", "output_key": "output", "device_map": "auto", "cache_dir": null, "optim": "adamw_torch", "model_max_length": 512, "debug_mode": false, "debug_train_data_size": 1024, "wandb_project": ""}

flyteeopmp26r/local_flytekit/8ed8cade608dc55fe9aaec6dd8d60c20/00000 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:067772915d011157436dc1ea88cb38756555e25be2d07616d1ee97dfac6e6535
+size 133886409

flyteg7cwhdqx/local_flytekit/45e4b36c3a0889b39fe253c6b3c94b2f/00000 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:067772915d011157436dc1ea88cb38756555e25be2d07616d1ee97dfac6e6535
+size 133886409

flyteraquk0cj/local_flytekit/776069c6405df68fd2755ce257e952ba/00000 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:067772915d011157436dc1ea88cb38756555e25be2d07616d1ee97dfac6e6535
+size 133886409

flyterpqo54fv/local_flytekit/fd49b76dd3b1ffbc62b1efcef00fd674/00000 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:067772915d011157436dc1ea88cb38756555e25be2d07616d1ee97dfac6e6535
+size 133886409

flyteyao8jgm7/local_flytekit/67696dba0a579df645b5b2f987a9e4b9/00000 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:067772915d011157436dc1ea88cb38756555e25be2d07616d1ee97dfac6e6535
+size 133886409

flyteyfv3rs04/local_flytekit/65aa521dee1e8da3c795348937da23ed/00000 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:067772915d011157436dc1ea88cb38756555e25be2d07616d1ee97dfac6e6535
+size 133886409

pytorch_model-00001-of-00003.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:717aa6c5a9ebb23eee5bea4b43851739a7d682236bc51e101b2765a30e048a78
 size 9877982386

 version https://git-lfs.github.com/spec/v1
+oid sha256:0edb9f1a102ad6501ee570b17824779e345dad58fa4c0fee69b413296923668b
 size 9877982386

pytorch_model-00002-of-00003.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ef52dafc8593a489a56cef5484997b03f7c429d36d19523e3b7b7c73481ce5f
 size 9894793766

 version https://git-lfs.github.com/spec/v1
+oid sha256:e0210300fa233e838ff00964ab3e48ed9d867c21001e52949992e0ec55ed3cff
 size 9894793766

pytorch_model-00003-of-00003.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d0f6c03132289fc1e002046889f735e28e7d4503a8134e07f1e4e7e5a94fcdb8
 size 7180985861

 version https://git-lfs.github.com/spec/v1
+oid sha256:b9608bedcb9fd77131a6d81629f8caddaa607b616ac9440b5f1b515bb1a705db
 size 7180985861

tmp2uwb6tgl/_remote_module_non_scriptable.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from typing import *
+import torch
+import torch.distributed.rpc as rpc
+from torch import Tensor
+from torch._jit_internal import Future
+from torch.distributed.rpc import RRef
+from typing import Tuple  # pyre-ignore: unused import
+module_interface_cls = None
+def forward_async(self, *args, **kwargs):
+    args = (self.module_rref, self.device, self.is_device_map_set, *args)
+    kwargs = {**kwargs}
+    return rpc.rpc_async(
+        self.module_rref.owner(),
+        _remote_forward,
+        args,
+        kwargs,
+    )
+def forward(self, *args, **kwargs):
+    args = (self.module_rref, self.device, self.is_device_map_set, *args)
+    kwargs = {**kwargs}
+    ret_fut = rpc.rpc_async(
+        self.module_rref.owner(),
+        _remote_forward,
+        args,
+        kwargs,
+    )
+    return ret_fut.wait()
+_generated_methods = [
+    forward_async,
+    forward,
+]
+def _remote_forward(
+    module_rref: RRef[module_interface_cls], device: str, is_device_map_set: bool, *args, **kwargs):
+    module = module_rref.local_value()
+    device = torch.device(device)
+    if device.type != "cuda":
+        return module.forward(*args, **kwargs)
+    # If the module is on a cuda device,
+    # move any CPU tensor in args or kwargs to the same cuda device.
+    # Since torch script does not support generator expression,
+    # have to use concatenation instead of
+    # ``tuple(i.to(device) if isinstance(i, Tensor) else i for i in *args)``.
+    args = (*args,)
+    out_args: Tuple[()] = ()
+    for arg in args:
+        arg = (arg.to(device),) if isinstance(arg, Tensor) else (arg,)
+        out_args = out_args + arg
+    kwargs = {**kwargs}
+    for k, v in kwargs.items():
+        if isinstance(v, Tensor):
+            kwargs[k] = kwargs[k].to(device)
+    if is_device_map_set:
+        return module.forward(*out_args, **kwargs)
+    # If the device map is empty, then only CPU tensors are allowed to send over wire,
+    # so have to move any GPU tensor to CPU in the output.
+    # Since torch script does not support generator expression,
+    # have to use concatenation instead of
+    # ``tuple(i.cpu() if isinstance(i, Tensor) else i for i in module.forward(*out_args, **kwargs))``.
+    ret: Tuple[()] = ()
+    for i in module.forward(*out_args, **kwargs):
+        i = (i.cpu(),) if isinstance(i, Tensor) else (i,)
+        ret = ret + i
+    return ret

tmp5210xtp5/_remote_module_non_scriptable.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from typing import *
+import torch
+import torch.distributed.rpc as rpc
+from torch import Tensor
+from torch._jit_internal import Future
+from torch.distributed.rpc import RRef
+from typing import Tuple  # pyre-ignore: unused import
+module_interface_cls = None
+def forward_async(self, *args, **kwargs):
+    args = (self.module_rref, self.device, self.is_device_map_set, *args)
+    kwargs = {**kwargs}
+    return rpc.rpc_async(
+        self.module_rref.owner(),
+        _remote_forward,
+        args,
+        kwargs,
+    )
+def forward(self, *args, **kwargs):
+    args = (self.module_rref, self.device, self.is_device_map_set, *args)
+    kwargs = {**kwargs}
+    ret_fut = rpc.rpc_async(
+        self.module_rref.owner(),
+        _remote_forward,
+        args,
+        kwargs,
+    )
+    return ret_fut.wait()
+_generated_methods = [
+    forward_async,
+    forward,
+]
+def _remote_forward(
+    module_rref: RRef[module_interface_cls], device: str, is_device_map_set: bool, *args, **kwargs):
+    module = module_rref.local_value()
+    device = torch.device(device)
+    if device.type != "cuda":
+        return module.forward(*args, **kwargs)
+    # If the module is on a cuda device,
+    # move any CPU tensor in args or kwargs to the same cuda device.
+    # Since torch script does not support generator expression,
+    # have to use concatenation instead of
+    # ``tuple(i.to(device) if isinstance(i, Tensor) else i for i in *args)``.
+    args = (*args,)
+    out_args: Tuple[()] = ()
+    for arg in args:
+        arg = (arg.to(device),) if isinstance(arg, Tensor) else (arg,)
+        out_args = out_args + arg
+    kwargs = {**kwargs}
+    for k, v in kwargs.items():
+        if isinstance(v, Tensor):
+            kwargs[k] = kwargs[k].to(device)
+    if is_device_map_set:
+        return module.forward(*out_args, **kwargs)
+    # If the device map is empty, then only CPU tensors are allowed to send over wire,
+    # so have to move any GPU tensor to CPU in the output.
+    # Since torch script does not support generator expression,
+    # have to use concatenation instead of
+    # ``tuple(i.cpu() if isinstance(i, Tensor) else i for i in module.forward(*out_args, **kwargs))``.
+    ret: Tuple[()] = ()
+    for i in module.forward(*out_args, **kwargs):
+        i = (i.cpu(),) if isinstance(i, Tensor) else (i,)
+        ret = ret + i
+    return ret

tmp9bh3sdsi/_remote_module_non_scriptable.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from typing import *
+import torch
+import torch.distributed.rpc as rpc
+from torch import Tensor
+from torch._jit_internal import Future
+from torch.distributed.rpc import RRef
+from typing import Tuple  # pyre-ignore: unused import
+module_interface_cls = None
+def forward_async(self, *args, **kwargs):
+    args = (self.module_rref, self.device, self.is_device_map_set, *args)
+    kwargs = {**kwargs}
+    return rpc.rpc_async(
+        self.module_rref.owner(),
+        _remote_forward,
+        args,
+        kwargs,
+    )
+def forward(self, *args, **kwargs):
+    args = (self.module_rref, self.device, self.is_device_map_set, *args)
+    kwargs = {**kwargs}
+    ret_fut = rpc.rpc_async(
+        self.module_rref.owner(),
+        _remote_forward,
+        args,
+        kwargs,
+    )
+    return ret_fut.wait()
+_generated_methods = [
+    forward_async,
+    forward,
+]
+def _remote_forward(
+    module_rref: RRef[module_interface_cls], device: str, is_device_map_set: bool, *args, **kwargs):
+    module = module_rref.local_value()
+    device = torch.device(device)
+    if device.type != "cuda":
+        return module.forward(*args, **kwargs)
+    # If the module is on a cuda device,
+    # move any CPU tensor in args or kwargs to the same cuda device.
+    # Since torch script does not support generator expression,
+    # have to use concatenation instead of
+    # ``tuple(i.to(device) if isinstance(i, Tensor) else i for i in *args)``.
+    args = (*args,)
+    out_args: Tuple[()] = ()
+    for arg in args:
+        arg = (arg.to(device),) if isinstance(arg, Tensor) else (arg,)
+        out_args = out_args + arg
+    kwargs = {**kwargs}
+    for k, v in kwargs.items():
+        if isinstance(v, Tensor):
+            kwargs[k] = kwargs[k].to(device)
+    if is_device_map_set:
+        return module.forward(*out_args, **kwargs)
+    # If the device map is empty, then only CPU tensors are allowed to send over wire,
+    # so have to move any GPU tensor to CPU in the output.
+    # Since torch script does not support generator expression,
+    # have to use concatenation instead of
+    # ``tuple(i.cpu() if isinstance(i, Tensor) else i for i in module.forward(*out_args, **kwargs))``.
+    ret: Tuple[()] = ()
+    for i in module.forward(*out_args, **kwargs):
+        i = (i.cpu(),) if isinstance(i, Tensor) else (i,)
+        ret = ret + i
+    return ret

tmphrsaxah2/_remote_module_non_scriptable.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from typing import *
+import torch
+import torch.distributed.rpc as rpc
+from torch import Tensor
+from torch._jit_internal import Future
+from torch.distributed.rpc import RRef
+from typing import Tuple  # pyre-ignore: unused import
+module_interface_cls = None
+def forward_async(self, *args, **kwargs):
+    args = (self.module_rref, self.device, self.is_device_map_set, *args)
+    kwargs = {**kwargs}
+    return rpc.rpc_async(
+        self.module_rref.owner(),
+        _remote_forward,
+        args,
+        kwargs,
+    )
+def forward(self, *args, **kwargs):
+    args = (self.module_rref, self.device, self.is_device_map_set, *args)
+    kwargs = {**kwargs}
+    ret_fut = rpc.rpc_async(
+        self.module_rref.owner(),
+        _remote_forward,
+        args,
+        kwargs,
+    )
+    return ret_fut.wait()
+_generated_methods = [
+    forward_async,
+    forward,
+]
+def _remote_forward(
+    module_rref: RRef[module_interface_cls], device: str, is_device_map_set: bool, *args, **kwargs):
+    module = module_rref.local_value()
+    device = torch.device(device)
+    if device.type != "cuda":
+        return module.forward(*args, **kwargs)
+    # If the module is on a cuda device,
+    # move any CPU tensor in args or kwargs to the same cuda device.
+    # Since torch script does not support generator expression,
+    # have to use concatenation instead of
+    # ``tuple(i.to(device) if isinstance(i, Tensor) else i for i in *args)``.
+    args = (*args,)
+    out_args: Tuple[()] = ()
+    for arg in args:
+        arg = (arg.to(device),) if isinstance(arg, Tensor) else (arg,)
+        out_args = out_args + arg
+    kwargs = {**kwargs}
+    for k, v in kwargs.items():
+        if isinstance(v, Tensor):
+            kwargs[k] = kwargs[k].to(device)
+    if is_device_map_set:
+        return module.forward(*out_args, **kwargs)
+    # If the device map is empty, then only CPU tensors are allowed to send over wire,
+    # so have to move any GPU tensor to CPU in the output.
+    # Since torch script does not support generator expression,
+    # have to use concatenation instead of
+    # ``tuple(i.cpu() if isinstance(i, Tensor) else i for i in module.forward(*out_args, **kwargs))``.
+    ret: Tuple[()] = ()
+    for i in module.forward(*out_args, **kwargs):
+        i = (i.cpu(),) if isinstance(i, Tensor) else (i,)
+        ret = ret + i
+    return ret

tmpi_fed6hf/_remote_module_non_scriptable.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from typing import *
+import torch
+import torch.distributed.rpc as rpc
+from torch import Tensor
+from torch._jit_internal import Future
+from torch.distributed.rpc import RRef
+from typing import Tuple  # pyre-ignore: unused import
+module_interface_cls = None
+def forward_async(self, *args, **kwargs):
+    args = (self.module_rref, self.device, self.is_device_map_set, *args)
+    kwargs = {**kwargs}
+    return rpc.rpc_async(
+        self.module_rref.owner(),
+        _remote_forward,
+        args,
+        kwargs,
+    )
+def forward(self, *args, **kwargs):
+    args = (self.module_rref, self.device, self.is_device_map_set, *args)
+    kwargs = {**kwargs}
+    ret_fut = rpc.rpc_async(
+        self.module_rref.owner(),
+        _remote_forward,
+        args,
+        kwargs,
+    )
+    return ret_fut.wait()
+_generated_methods = [
+    forward_async,
+    forward,
+]
+def _remote_forward(
+    module_rref: RRef[module_interface_cls], device: str, is_device_map_set: bool, *args, **kwargs):
+    module = module_rref.local_value()
+    device = torch.device(device)
+    if device.type != "cuda":
+        return module.forward(*args, **kwargs)
+    # If the module is on a cuda device,
+    # move any CPU tensor in args or kwargs to the same cuda device.
+    # Since torch script does not support generator expression,
+    # have to use concatenation instead of
+    # ``tuple(i.to(device) if isinstance(i, Tensor) else i for i in *args)``.
+    args = (*args,)
+    out_args: Tuple[()] = ()
+    for arg in args:
+        arg = (arg.to(device),) if isinstance(arg, Tensor) else (arg,)
+        out_args = out_args + arg
+    kwargs = {**kwargs}
+    for k, v in kwargs.items():
+        if isinstance(v, Tensor):
+            kwargs[k] = kwargs[k].to(device)
+    if is_device_map_set:
+        return module.forward(*out_args, **kwargs)
+    # If the device map is empty, then only CPU tensors are allowed to send over wire,
+    # so have to move any GPU tensor to CPU in the output.
+    # Since torch script does not support generator expression,
+    # have to use concatenation instead of
+    # ``tuple(i.cpu() if isinstance(i, Tensor) else i for i in module.forward(*out_args, **kwargs))``.
+    ret: Tuple[()] = ()
+    for i in module.forward(*out_args, **kwargs):
+        i = (i.cpu(),) if isinstance(i, Tensor) else (i,)
+        ret = ret + i
+    return ret

tmpn7s2kko7/_remote_module_non_scriptable.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from typing import *
+import torch
+import torch.distributed.rpc as rpc
+from torch import Tensor
+from torch._jit_internal import Future
+from torch.distributed.rpc import RRef
+from typing import Tuple  # pyre-ignore: unused import
+module_interface_cls = None
+def forward_async(self, *args, **kwargs):
+    args = (self.module_rref, self.device, self.is_device_map_set, *args)
+    kwargs = {**kwargs}
+    return rpc.rpc_async(
+        self.module_rref.owner(),
+        _remote_forward,
+        args,
+        kwargs,
+    )
+def forward(self, *args, **kwargs):
+    args = (self.module_rref, self.device, self.is_device_map_set, *args)
+    kwargs = {**kwargs}
+    ret_fut = rpc.rpc_async(
+        self.module_rref.owner(),
+        _remote_forward,
+        args,
+        kwargs,
+    )
+    return ret_fut.wait()
+_generated_methods = [
+    forward_async,
+    forward,
+]
+def _remote_forward(
+    module_rref: RRef[module_interface_cls], device: str, is_device_map_set: bool, *args, **kwargs):
+    module = module_rref.local_value()
+    device = torch.device(device)
+    if device.type != "cuda":
+        return module.forward(*args, **kwargs)
+    # If the module is on a cuda device,
+    # move any CPU tensor in args or kwargs to the same cuda device.
+    # Since torch script does not support generator expression,
+    # have to use concatenation instead of
+    # ``tuple(i.to(device) if isinstance(i, Tensor) else i for i in *args)``.
+    args = (*args,)
+    out_args: Tuple[()] = ()
+    for arg in args:
+        arg = (arg.to(device),) if isinstance(arg, Tensor) else (arg,)
+        out_args = out_args + arg
+    kwargs = {**kwargs}
+    for k, v in kwargs.items():
+        if isinstance(v, Tensor):
+            kwargs[k] = kwargs[k].to(device)
+    if is_device_map_set:
+        return module.forward(*out_args, **kwargs)
+    # If the device map is empty, then only CPU tensors are allowed to send over wire,
+    # so have to move any GPU tensor to CPU in the output.
+    # Since torch script does not support generator expression,
+    # have to use concatenation instead of
+    # ``tuple(i.cpu() if isinstance(i, Tensor) else i for i in module.forward(*out_args, **kwargs))``.
+    ret: Tuple[()] = ()
+    for i in module.forward(*out_args, **kwargs):
+        i = (i.cpu(),) if isinstance(i, Tensor) else (i,)
+        ret = ret + i
+    return ret

tmptwgnkwb4/__pycache__/_remote_module_non_scriptable.cpython-310.pyc ADDED Viewed

Binary file (1.5 kB). View file

tmptwgnkwb4/_remote_module_non_scriptable.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from typing import *
+import torch
+import torch.distributed.rpc as rpc
+from torch import Tensor
+from torch._jit_internal import Future
+from torch.distributed.rpc import RRef
+from typing import Tuple  # pyre-ignore: unused import
+module_interface_cls = None
+def forward_async(self, *args, **kwargs):
+    args = (self.module_rref, self.device, self.is_device_map_set, *args)
+    kwargs = {**kwargs}
+    return rpc.rpc_async(
+        self.module_rref.owner(),
+        _remote_forward,
+        args,
+        kwargs,
+    )
+def forward(self, *args, **kwargs):
+    args = (self.module_rref, self.device, self.is_device_map_set, *args)
+    kwargs = {**kwargs}
+    ret_fut = rpc.rpc_async(
+        self.module_rref.owner(),
+        _remote_forward,
+        args,
+        kwargs,
+    )
+    return ret_fut.wait()
+_generated_methods = [
+    forward_async,
+    forward,
+]
+def _remote_forward(
+    module_rref: RRef[module_interface_cls], device: str, is_device_map_set: bool, *args, **kwargs):
+    module = module_rref.local_value()
+    device = torch.device(device)
+    if device.type != "cuda":
+        return module.forward(*args, **kwargs)
+    # If the module is on a cuda device,
+    # move any CPU tensor in args or kwargs to the same cuda device.
+    # Since torch script does not support generator expression,
+    # have to use concatenation instead of
+    # ``tuple(i.to(device) if isinstance(i, Tensor) else i for i in *args)``.
+    args = (*args,)
+    out_args: Tuple[()] = ()
+    for arg in args:
+        arg = (arg.to(device),) if isinstance(arg, Tensor) else (arg,)
+        out_args = out_args + arg
+    kwargs = {**kwargs}
+    for k, v in kwargs.items():
+        if isinstance(v, Tensor):
+            kwargs[k] = kwargs[k].to(device)
+    if is_device_map_set:
+        return module.forward(*out_args, **kwargs)
+    # If the device map is empty, then only CPU tensors are allowed to send over wire,
+    # so have to move any GPU tensor to CPU in the output.
+    # Since torch script does not support generator expression,
+    # have to use concatenation instead of
+    # ``tuple(i.cpu() if isinstance(i, Tensor) else i for i in module.forward(*out_args, **kwargs))``.
+    ret: Tuple[()] = ()
+    for i in module.forward(*out_args, **kwargs):
+        i = (i.cpu(),) if isinstance(i, Tensor) else (i,)
+        ret = ret + i
+    return ret

trainer_state.json CHANGED Viewed

@@ -1,46 +1,628 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 13.333333333333334,
   "eval_steps": 500,
-  "global_step": 30,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 4.44,
       "learning_rate": 0,
-      "loss": 1.7045,
       "step": 10
     },
     {
       "epoch": 8.89,
       "learning_rate": 2e-05,
-      "loss": 1.6603,
       "step": 20
     },
     {
       "epoch": 13.33,
       "learning_rate": 2e-05,
-      "loss": 0.9123,
       "step": 30
     },
     {
-      "epoch": 13.33,
-      "step": 30,
-      "total_flos": 2021822300160.0,
-      "train_loss": 1.4256969451904298,
-      "train_runtime": 3249.3879,
-      "train_samples_per_second": 0.886,
-      "train_steps_per_second": 0.009
     }
   ],
-  "logging_steps": 10,
-  "max_steps": 30,
-  "num_train_epochs": 15,
   "save_steps": 200,
-  "total_flos": 2021822300160.0,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 44.44444444444444,
   "eval_steps": 500,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
+    {
+      "epoch": 0.44,
+      "learning_rate": 0,
+      "loss": 1.7341,
+      "step": 1
+    },
+    {
+      "epoch": 0.89,
+      "learning_rate": 0,
+      "loss": 1.7223,
+      "step": 2
+    },
+    {
+      "epoch": 1.33,
+      "learning_rate": 0,
+      "loss": 1.7608,
+      "step": 3
+    },
+    {
+      "epoch": 1.78,
+      "learning_rate": 0,
+      "loss": 1.7115,
+      "step": 4
+    },
+    {
+      "epoch": 2.22,
+      "learning_rate": 0,
+      "loss": 1.7181,
+      "step": 5
+    },
+    {
+      "epoch": 2.67,
+      "learning_rate": 0,
+      "loss": 1.7022,
+      "step": 6
+    },
+    {
+      "epoch": 3.11,
+      "learning_rate": 0,
+      "loss": 1.7242,
+      "step": 7
+    },
+    {
+      "epoch": 3.56,
+      "learning_rate": 0,
+      "loss": 1.7352,
+      "step": 8
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 0,
+      "loss": 1.7181,
+      "step": 9
+    },
     {
       "epoch": 4.44,
       "learning_rate": 0,
+      "loss": 1.7213,
       "step": 10
     },
+    {
+      "epoch": 4.89,
+      "learning_rate": 0,
+      "loss": 1.6694,
+      "step": 11
+    },
+    {
+      "epoch": 5.33,
+      "learning_rate": 0,
+      "loss": 1.7046,
+      "step": 12
+    },
+    {
+      "epoch": 5.78,
+      "learning_rate": 0,
+      "loss": 1.7109,
+      "step": 13
+    },
+    {
+      "epoch": 6.22,
+      "learning_rate": 0,
+      "loss": 1.6948,
+      "step": 14
+    },
+    {
+      "epoch": 6.67,
+      "learning_rate": 0,
+      "loss": 1.6816,
+      "step": 15
+    },
+    {
+      "epoch": 7.11,
+      "learning_rate": 0.0,
+      "loss": 1.6851,
+      "step": 16
+    },
+    {
+      "epoch": 7.56,
+      "learning_rate": 1.2618595071429148e-05,
+      "loss": 1.6041,
+      "step": 17
+    },
+    {
+      "epoch": 8.0,
+      "learning_rate": 2e-05,
+      "loss": 1.5208,
+      "step": 18
+    },
+    {
+      "epoch": 8.44,
+      "learning_rate": 2e-05,
+      "loss": 1.4946,
+      "step": 19
+    },
     {
       "epoch": 8.89,
       "learning_rate": 2e-05,
+      "loss": 1.492,
       "step": 20
     },
+    {
+      "epoch": 9.33,
+      "learning_rate": 2e-05,
+      "loss": 1.4501,
+      "step": 21
+    },
+    {
+      "epoch": 9.78,
+      "learning_rate": 2e-05,
+      "loss": 1.1894,
+      "step": 22
+    },
+    {
+      "epoch": 10.22,
+      "learning_rate": 2e-05,
+      "loss": 1.1437,
+      "step": 23
+    },
+    {
+      "epoch": 10.67,
+      "learning_rate": 2e-05,
+      "loss": 1.02,
+      "step": 24
+    },
+    {
+      "epoch": 11.11,
+      "learning_rate": 2e-05,
+      "loss": 0.926,
+      "step": 25
+    },
+    {
+      "epoch": 11.56,
+      "learning_rate": 2e-05,
+      "loss": 0.7794,
+      "step": 26
+    },
+    {
+      "epoch": 12.0,
+      "learning_rate": 2e-05,
+      "loss": 0.7719,
+      "step": 27
+    },
+    {
+      "epoch": 12.44,
+      "learning_rate": 2e-05,
+      "loss": 0.6107,
+      "step": 28
+    },
+    {
+      "epoch": 12.89,
+      "learning_rate": 2e-05,
+      "loss": 0.633,
+      "step": 29
+    },
     {
       "epoch": 13.33,
       "learning_rate": 2e-05,
+      "loss": 0.4781,
       "step": 30
     },
     {
+      "epoch": 13.78,
+      "learning_rate": 2e-05,
+      "loss": 0.4379,
+      "step": 31
+    },
+    {
+      "epoch": 14.22,
+      "learning_rate": 2e-05,
+      "loss": 0.3391,
+      "step": 32
+    },
+    {
+      "epoch": 14.67,
+      "learning_rate": 2e-05,
+      "loss": 0.2928,
+      "step": 33
+    },
+    {
+      "epoch": 15.11,
+      "learning_rate": 2e-05,
+      "loss": 0.2631,
+      "step": 34
+    },
+    {
+      "epoch": 15.56,
+      "learning_rate": 2e-05,
+      "loss": 0.2399,
+      "step": 35
+    },
+    {
+      "epoch": 16.0,
+      "learning_rate": 2e-05,
+      "loss": 0.2075,
+      "step": 36
+    },
+    {
+      "epoch": 16.44,
+      "learning_rate": 2e-05,
+      "loss": 0.186,
+      "step": 37
+    },
+    {
+      "epoch": 16.89,
+      "learning_rate": 2e-05,
+      "loss": 0.1782,
+      "step": 38
+    },
+    {
+      "epoch": 17.33,
+      "learning_rate": 2e-05,
+      "loss": 0.144,
+      "step": 39
+    },
+    {
+      "epoch": 17.78,
+      "learning_rate": 2e-05,
+      "loss": 0.1317,
+      "step": 40
+    },
+    {
+      "epoch": 18.22,
+      "learning_rate": 2e-05,
+      "loss": 0.1144,
+      "step": 41
+    },
+    {
+      "epoch": 18.67,
+      "learning_rate": 2e-05,
+      "loss": 0.1193,
+      "step": 42
+    },
+    {
+      "epoch": 19.11,
+      "learning_rate": 2e-05,
+      "loss": 0.1161,
+      "step": 43
+    },
+    {
+      "epoch": 19.56,
+      "learning_rate": 2e-05,
+      "loss": 0.0993,
+      "step": 44
+    },
+    {
+      "epoch": 20.0,
+      "learning_rate": 2e-05,
+      "loss": 0.1083,
+      "step": 45
+    },
+    {
+      "epoch": 20.44,
+      "learning_rate": 2e-05,
+      "loss": 0.101,
+      "step": 46
+    },
+    {
+      "epoch": 20.89,
+      "learning_rate": 2e-05,
+      "loss": 0.1013,
+      "step": 47
+    },
+    {
+      "epoch": 21.33,
+      "learning_rate": 2e-05,
+      "loss": 0.1066,
+      "step": 48
+    },
+    {
+      "epoch": 21.78,
+      "learning_rate": 2e-05,
+      "loss": 0.1005,
+      "step": 49
+    },
+    {
+      "epoch": 22.22,
+      "learning_rate": 2e-05,
+      "loss": 0.0882,
+      "step": 50
+    },
+    {
+      "epoch": 22.67,
+      "learning_rate": 2e-05,
+      "loss": 0.1067,
+      "step": 51
+    },
+    {
+      "epoch": 23.11,
+      "learning_rate": 2e-05,
+      "loss": 0.0797,
+      "step": 52
+    },
+    {
+      "epoch": 23.56,
+      "learning_rate": 2e-05,
+      "loss": 0.0943,
+      "step": 53
+    },
+    {
+      "epoch": 24.0,
+      "learning_rate": 2e-05,
+      "loss": 0.0769,
+      "step": 54
+    },
+    {
+      "epoch": 24.44,
+      "learning_rate": 2e-05,
+      "loss": 0.0855,
+      "step": 55
+    },
+    {
+      "epoch": 24.89,
+      "learning_rate": 2e-05,
+      "loss": 0.0735,
+      "step": 56
+    },
+    {
+      "epoch": 25.33,
+      "learning_rate": 2e-05,
+      "loss": 0.0833,
+      "step": 57
+    },
+    {
+      "epoch": 25.78,
+      "learning_rate": 2e-05,
+      "loss": 0.0811,
+      "step": 58
+    },
+    {
+      "epoch": 26.22,
+      "learning_rate": 2e-05,
+      "loss": 0.0772,
+      "step": 59
+    },
+    {
+      "epoch": 26.67,
+      "learning_rate": 2e-05,
+      "loss": 0.0721,
+      "step": 60
+    },
+    {
+      "epoch": 27.11,
+      "learning_rate": 2e-05,
+      "loss": 0.0825,
+      "step": 61
+    },
+    {
+      "epoch": 27.56,
+      "learning_rate": 2e-05,
+      "loss": 0.0758,
+      "step": 62
+    },
+    {
+      "epoch": 28.0,
+      "learning_rate": 2e-05,
+      "loss": 0.0725,
+      "step": 63
+    },
+    {
+      "epoch": 28.44,
+      "learning_rate": 2e-05,
+      "loss": 0.077,
+      "step": 64
+    },
+    {
+      "epoch": 28.89,
+      "learning_rate": 2e-05,
+      "loss": 0.0654,
+      "step": 65
+    },
+    {
+      "epoch": 29.33,
+      "learning_rate": 2e-05,
+      "loss": 0.0675,
+      "step": 66
+    },
+    {
+      "epoch": 29.78,
+      "learning_rate": 2e-05,
+      "loss": 0.0772,
+      "step": 67
+    },
+    {
+      "epoch": 30.22,
+      "learning_rate": 2e-05,
+      "loss": 0.0718,
+      "step": 68
+    },
+    {
+      "epoch": 30.67,
+      "learning_rate": 2e-05,
+      "loss": 0.0625,
+      "step": 69
+    },
+    {
+      "epoch": 31.11,
+      "learning_rate": 2e-05,
+      "loss": 0.0616,
+      "step": 70
+    },
+    {
+      "epoch": 31.56,
+      "learning_rate": 2e-05,
+      "loss": 0.071,
+      "step": 71
+    },
+    {
+      "epoch": 32.0,
+      "learning_rate": 2e-05,
+      "loss": 0.0655,
+      "step": 72
+    },
+    {
+      "epoch": 32.44,
+      "learning_rate": 2e-05,
+      "loss": 0.0591,
+      "step": 73
+    },
+    {
+      "epoch": 32.89,
+      "learning_rate": 2e-05,
+      "loss": 0.0669,
+      "step": 74
+    },
+    {
+      "epoch": 33.33,
+      "learning_rate": 2e-05,
+      "loss": 0.0653,
+      "step": 75
+    },
+    {
+      "epoch": 33.78,
+      "learning_rate": 2e-05,
+      "loss": 0.0662,
+      "step": 76
+    },
+    {
+      "epoch": 34.22,
+      "learning_rate": 2e-05,
+      "loss": 0.0688,
+      "step": 77
+    },
+    {
+      "epoch": 34.67,
+      "learning_rate": 2e-05,
+      "loss": 0.0498,
+      "step": 78
+    },
+    {
+      "epoch": 35.11,
+      "learning_rate": 2e-05,
+      "loss": 0.0576,
+      "step": 79
+    },
+    {
+      "epoch": 35.56,
+      "learning_rate": 2e-05,
+      "loss": 0.0737,
+      "step": 80
+    },
+    {
+      "epoch": 36.0,
+      "learning_rate": 2e-05,
+      "loss": 0.0609,
+      "step": 81
+    },
+    {
+      "epoch": 36.44,
+      "learning_rate": 2e-05,
+      "loss": 0.0594,
+      "step": 82
+    },
+    {
+      "epoch": 36.89,
+      "learning_rate": 2e-05,
+      "loss": 0.0725,
+      "step": 83
+    },
+    {
+      "epoch": 37.33,
+      "learning_rate": 2e-05,
+      "loss": 0.0598,
+      "step": 84
+    },
+    {
+      "epoch": 37.78,
+      "learning_rate": 2e-05,
+      "loss": 0.0652,
+      "step": 85
+    },
+    {
+      "epoch": 38.22,
+      "learning_rate": 2e-05,
+      "loss": 0.0588,
+      "step": 86
+    },
+    {
+      "epoch": 38.67,
+      "learning_rate": 2e-05,
+      "loss": 0.0671,
+      "step": 87
+    },
+    {
+      "epoch": 39.11,
+      "learning_rate": 2e-05,
+      "loss": 0.0596,
+      "step": 88
+    },
+    {
+      "epoch": 39.56,
+      "learning_rate": 2e-05,
+      "loss": 0.0518,
+      "step": 89
+    },
+    {
+      "epoch": 40.0,
+      "learning_rate": 2e-05,
+      "loss": 0.0612,
+      "step": 90
+    },
+    {
+      "epoch": 40.44,
+      "learning_rate": 2e-05,
+      "loss": 0.0593,
+      "step": 91
+    },
+    {
+      "epoch": 40.89,
+      "learning_rate": 2e-05,
+      "loss": 0.0521,
+      "step": 92
+    },
+    {
+      "epoch": 41.33,
+      "learning_rate": 2e-05,
+      "loss": 0.0536,
+      "step": 93
+    },
+    {
+      "epoch": 41.78,
+      "learning_rate": 2e-05,
+      "loss": 0.0548,
+      "step": 94
+    },
+    {
+      "epoch": 42.22,
+      "learning_rate": 2e-05,
+      "loss": 0.0507,
+      "step": 95
+    },
+    {
+      "epoch": 42.67,
+      "learning_rate": 2e-05,
+      "loss": 0.0588,
+      "step": 96
+    },
+    {
+      "epoch": 43.11,
+      "learning_rate": 2e-05,
+      "loss": 0.0506,
+      "step": 97
+    },
+    {
+      "epoch": 43.56,
+      "learning_rate": 2e-05,
+      "loss": 0.055,
+      "step": 98
+    },
+    {
+      "epoch": 44.0,
+      "learning_rate": 2e-05,
+      "loss": 0.0503,
+      "step": 99
+    },
+    {
+      "epoch": 44.44,
+      "learning_rate": 2e-05,
+      "loss": 0.054,
+      "step": 100
+    },
+    {
+      "epoch": 44.44,
+      "step": 100,
+      "total_flos": 7478779576320.0,
+      "train_loss": 0.49326207719743254,
+      "train_runtime": 9902.4306,
+      "train_samples_per_second": 0.969,
+      "train_steps_per_second": 0.01
     }
   ],
+  "logging_steps": 1,
+  "max_steps": 100,
+  "num_train_epochs": 50,
   "save_steps": 200,
+  "total_flos": 7478779576320.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9ae75cc4463643d503efe9965d07a683ae889e7742c053abd5aa9e79876df4bf
 size 6523

 version https://git-lfs.github.com/spec/v1
+oid sha256:135858efd4811a09b43593532cb735b8e0bee8450cc74446fdba3f0ec24a504a
 size 6523