oliverdk commited on 1 day ago

Commit

995430d

•

1 Parent(s): d4af455

End of training

Browse files

Files changed (23) hide show

.hydra/config.yaml +17 -0
.hydra/hydra.yaml +182 -0
.hydra/overrides.yaml +1 -0
README.md +76 -0
added_tokens.json +40 -0
config.json +54 -0
configuration_code_gen_measuremet_pred.py +11 -0
configuration_measurement_pred.py +26 -0
logs/events.out.tfevents.1734404060.gail.ist.berkeley.edu.216960.0 +3 -0
merges.txt +0 -0
model.safetensors +3 -0
modeling_code_gen_measurement_pred.py +13 -0
modeling_measurement_pred.py +104 -0
sensor_loc_finder.py +17 -0
sensor_loc_reg.py +10 -0
sensor_loc_stories.py +46 -0
sensor_locs_from_token.py +16 -0
special_tokens_map.json +24 -0
tokenizer.json +0 -0
tokenizer_config.json +327 -0
train.log +1 -0
training_args.bin +3 -0
vocab.json +0 -0

.hydra/config.yaml ADDED Viewed

	@@ -0,0 +1,17 @@

+model:
+  dataset_name: redwoodresearch/diamonds-seed7
+  model_type: codegen
+  pretrained_model_name: Salesforce/codegen-350M-mono
+  max_length: 1024
+hparams:
+  learning_rate: 2.0e-05
+  weight_decay: 0.02
+  lr_scheduler_type: cosine
+  warmup_steps: 64
+  effective_batch_size: 32
+  num_train_epochs: 5
+per_device_train_batch_size: 4
+per_device_eval_batch_size: 4
+fp16: true
+dataset_len: null
+push_to_hub: true

.hydra/hydra.yaml ADDED Viewed

	@@ -0,0 +1,182 @@

+hydra:
+  run:
+    dir: outputs/${now:%Y-%m-%d}/${now:%H-%M-%S}
+  sweep:
+    dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S}
+    subdir: ${hydra.job.num}
+  launcher:
+    submitit_folder: ${hydra.sweep.dir}/.submitit/%j
+    timeout_min: 1440
+    cpus_per_task: null
+    gpus_per_node: null
+    tasks_per_node: 1
+    mem_gb: 16
+    nodes: 1
+    name: ${hydra.job.name}
+    stderr_to_stdout: false
+    _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
+    partition: null
+    qos: high
+    comment: null
+    constraint: null
+    exclude: ddpg.ist.berkeley.edu,dqn.ist.berkeley.edu
+    gres: gpu:A6000:1
+    cpus_per_gpu: null
+    gpus_per_task: null
+    mem_per_gpu: null
+    mem_per_cpu: null
+    account: null
+    signal_delay_s: 120
+    max_num_timeout: 0
+    additional_parameters: {}
+    array_parallelism: 256
+    setup: null
+  sweeper:
+    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
+    max_batch_size: null
+    params: null
+  help:
+    app_name: ${hydra.job.name}
+    header: '${hydra.help.app_name} is powered by Hydra.
+      '
+    footer: 'Powered by Hydra (https://hydra.cc)
+      Use --hydra-help to view Hydra specific help
+      '
+    template: '${hydra.help.header}
+      == Configuration groups ==
+      Compose your configuration from those groups (group=option)
+      $APP_CONFIG_GROUPS
+      == Config ==
+      Override anything in the config (foo.bar=value)
+      $CONFIG
+      ${hydra.help.footer}
+      '
+  hydra_help:
+    template: 'Hydra (${hydra.runtime.version})
+      See https://hydra.cc for more info.
+      == Flags ==
+      $FLAGS_HELP
+      == Configuration groups ==
+      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
+      to command line)
+      $HYDRA_CONFIG_GROUPS
+      Use ''--cfg hydra'' to Show the Hydra config.
+      '
+    hydra_help: ???
+  hydra_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][HYDRA] %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+    root:
+      level: INFO
+      handlers:
+      - console
+    loggers:
+      logging_example:
+        level: DEBUG
+    disable_existing_loggers: false
+  job_logging:
+    version: 1
+    formatters:
+      simple:
+        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
+    handlers:
+      console:
+        class: logging.StreamHandler
+        formatter: simple
+        stream: ext://sys.stdout
+      file:
+        class: logging.FileHandler
+        formatter: simple
+        filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log
+    root:
+      level: INFO
+      handlers:
+      - console
+      - file
+    disable_existing_loggers: false
+  env: {}
+  mode: MULTIRUN
+  searchpath: []
+  callbacks: {}
+  output_subdir: .hydra
+  overrides:
+    hydra:
+    - hydra.mode=MULTIRUN
+    task:
+    - model.dataset_name=redwoodresearch/diamonds-seed7
+  job:
+    name: train
+    chdir: null
+    override_dirname: model.dataset_name=redwoodresearch/diamonds-seed7
+    id: '746836'
+    num: 0
+    config_name: codegen_diamonds_slurm
+    env_set: {}
+    env_copy: []
+    config:
+      override_dirname:
+        kv_sep: '='
+        item_sep: ','
+        exclude_keys: []
+  runtime:
+    version: 1.3.2
+    version_base: '1.1'
+    cwd: /nas/ucb/oliveradk/measurement-pred
+    config_sources:
+    - path: hydra.conf
+      schema: pkg
+      provider: hydra
+    - path: /nas/ucb/oliveradk/measurement-pred/conf
+      schema: file
+      provider: main
+    - path: ''
+      schema: structured
+      provider: schema
+    output_dir: /nas/ucb/oliveradk/measurement-pred/multirun/2024-12-16/18-53-11/0
+    choices:
+      hparams: hparams
+      model: codegen_diamonds
+      hydra/env: default
+      hydra/callbacks: null
+      hydra/job_logging: default
+      hydra/hydra_logging: default
+      hydra/hydra_help: default
+      hydra/help: default
+      hydra/sweeper: basic
+      hydra/launcher: slurm_chai
+      hydra/output: default
+  verbose: false

.hydra/overrides.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ - model.dataset_name=redwoodresearch/diamonds-seed7

README.md ADDED Viewed

	@@ -0,0 +1,76 @@

+---
+license: bsd-3-clause
+base_model: Salesforce/codegen-350M-mono
+tags:
+- generated_from_trainer
+metrics:
+- accuracy
+model-index:
+- name: codegen-350M-mono-measurement_pred-diamonds-seed7
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# codegen-350M-mono-measurement_pred-diamonds-seed7
+This model is a fine-tuned version of [Salesforce/codegen-350M-mono](https://huggingface.co/Salesforce/codegen-350M-mono) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.4759
+- Accuracy: 0.9018
+- Accuracy Sensor 0: 0.9093
+- Auroc Sensor 0: 0.9563
+- Accuracy Sensor 1: 0.9046
+- Auroc Sensor 1: 0.9558
+- Accuracy Sensor 2: 0.9110
+- Auroc Sensor 2: 0.9461
+- Accuracy Aggregated: 0.8822
+- Auroc Aggregated: 0.9403
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 4
+- eval_batch_size: 4
+- seed: 42
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 32
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 64
+- num_epochs: 5
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss | Accuracy | Accuracy Sensor 0 | Auroc Sensor 0 | Accuracy Sensor 1 | Auroc Sensor 1 | Accuracy Sensor 2 | Auroc Sensor 2 | Accuracy Aggregated | Auroc Aggregated |
+|:-------------:|:------:|:----:|:---------------:|:--------:|:-----------------:|:--------------:|:-----------------:|:--------------:|:-----------------:|:--------------:|:-------------------:|:----------------:|
+| 0.3029        | 0.9997 | 781  | 0.5009          | 0.7947   | 0.7920            | 0.8988         | 0.7962            | 0.9030         | 0.8191            | 0.8947         | 0.7717              | 0.8803           |
+| 0.2099        | 1.9994 | 1562 | 0.4386          | 0.8330   | 0.8430            | 0.9267         | 0.8214            | 0.9266         | 0.8523            | 0.9287         | 0.8154              | 0.9148           |
+| 0.1366        | 2.9990 | 2343 | 0.3970          | 0.8638   | 0.8850            | 0.9499         | 0.8800            | 0.9485         | 0.8568            | 0.9428         | 0.8336              | 0.9330           |
+| 0.0719        | 4.0    | 3125 | 0.3534          | 0.9090   | 0.9121            | 0.9578         | 0.9090            | 0.9575         | 0.9209            | 0.9470         | 0.8940              | 0.9424           |
+| 0.0379        | 4.9984 | 3905 | 0.4759          | 0.9018   | 0.9093            | 0.9563         | 0.9046            | 0.9558         | 0.9110            | 0.9461         | 0.8822              | 0.9403           |
+### Framework versions
+- Transformers 4.41.0
+- Pytorch 2.3.0+cu121
+- Datasets 2.19.1
+- Tokenizers 0.19.1

added_tokens.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "\t\t": 50294,
+  "\t\t\t": 50293,
+  "\t\t\t\t": 50292,
+  "\t\t\t\t\t": 50291,
+  "\t\t\t\t\t\t": 50290,
+  "\t\t\t\t\t\t\t": 50289,
+  "\t\t\t\t\t\t\t\t": 50288,
+  "\t\t\t\t\t\t\t\t\t": 50287,
+  "  ": 50286,
+  "   ": 50285,
+  "    ": 50284,
+  "     ": 50283,
+  "      ": 50282,
+  "       ": 50281,
+  "        ": 50280,
+  "         ": 50279,
+  "          ": 50278,
+  "           ": 50277,
+  "            ": 50276,
+  "             ": 50275,
+  "              ": 50274,
+  "               ": 50273,
+  "                ": 50272,
+  "                 ": 50271,
+  "                  ": 50270,
+  "                   ": 50269,
+  "                    ": 50268,
+  "                     ": 50267,
+  "                      ": 50266,
+  "                       ": 50265,
+  "                        ": 50264,
+  "                         ": 50263,
+  "                          ": 50262,
+  "                           ": 50261,
+  "                            ": 50260,
+  "                             ": 50259,
+  "                              ": 50258,
+  "                               ": 50257
+}

config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "_name_or_path": "Salesforce/codegen-350M-mono",
+  "activation_function": "gelu_new",
+  "aggregate_weight": 0.3,
+  "architectures": [
+    "CodeGenMeasurementPredictor"
+  ],
+  "attn_pdrop": 0.0,
+  "auto_map": {
+    "AutoConfig": "configuration_code_gen_measuremet_pred.CodeGenMeasurementPredictorConfig",
+    "AutoModelForSequenceClassification": "modeling_code_gen_measurement_pred.CodeGenMeasurementPredictor"
+  },
+  "bos_token_id": 1,
+  "emb_dim": 1024,
+  "embd_pdrop": 0.0,
+  "eos_token_id": 50256,
+  "gradient_checkpointing": false,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "codegen_mp",
+  "n_ctx": 2048,
+  "n_embd": 1024,
+  "n_head": 16,
+  "n_inner": null,
+  "n_layer": 20,
+  "n_positions": 2048,
+  "n_sensors": 3,
+  "resid_pdrop": 0.0,
+  "rotary_dim": 32,
+  "scale_attn_weights": true,
+  "sensor_loc_type": "locs_from_token",
+  "sensor_token": " omit",
+  "sensor_token_id": 42848,
+  "sensors_weight": 0.7,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50,
+      "temperature": 1.0
+    }
+  },
+  "tie_word_embeddings": false,
+  "tokenizer_class": "GPT2Tokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.41.0",
+  "use_aggregated": true,
+  "use_cache": false,
+  "vocab_size": 51200
+}

configuration_code_gen_measuremet_pred.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from transformers.models.codegen import CodeGenConfig
+from .configuration_measurement_pred import MeasurementPredictorConfig
+class CodeGenMeasurementPredictorConfig(MeasurementPredictorConfig, CodeGenConfig):
+    model_type = "codegen_mp"
+    def __init__(self, **kwargs):
+        kwargs["sensor_token_id"] = 42848
+        super().__init__(**kwargs)
+    def get_emb_dim(self):
+        return self.n_embd

configuration_measurement_pred.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from abc import abstractmethod
+from transformers import PretrainedConfig
+class MeasurementPredictorConfig(PretrainedConfig):
+    def __init__(
+        self,
+        sensor_token=" omit",
+        sensor_loc_type="locs_from_token",
+        n_sensors=3,
+        use_aggregated=True,
+        sensors_weight = 0.7,
+        aggregate_weight=0.3,
+        **kwargs
+    ):
+        self.sensor_token = sensor_token
+        self.sensor_loc_type = sensor_loc_type
+        self.n_sensors = n_sensors
+        self.use_aggregated = use_aggregated
+        self.sensors_weight = sensors_weight
+        self.aggregate_weight = aggregate_weight
+        super().__init__(**kwargs)
+        self.emb_dim = self.get_emb_dim()
+    @abstractmethod
+    def get_emb_dim(self):
+        raise NotImplementedError

logs/events.out.tfevents.1734404060.gail.ist.berkeley.edu.216960.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:72d3fdbfdf75f1684f8505966782e4885aab6457cc52b9a62799be78a0b657be
+size 16069

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2eed6800bb2fdf0697d992730a4095081fcfaf6283946981189adb674638bd38
+size 1216963976

modeling_code_gen_measurement_pred.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from transformers.models.codegen import CodeGenPreTrainedModel, CodeGenModel
+from .modeling_measurement_pred import MeasurementPredictorMixin
+from .configuration_code_gen_measuremet_pred import CodeGenMeasurementPredictorConfig
+class CodeGenMeasurementPredictor(CodeGenPreTrainedModel, MeasurementPredictorMixin):
+    config_class = CodeGenMeasurementPredictorConfig
+    def __init__(self, config):
+        super().__init__(config)
+        self.transformer = CodeGenModel(config)
+        self.post_init()

modeling_measurement_pred.py ADDED Viewed

	@@ -0,0 +1,104 @@

+from typing import Optional, Tuple, Union
+import torch
+from torch.nn import BCEWithLogitsLoss
+from transformers import PreTrainedModel, PreTrainedTokenizer
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase
+from transformers.modeling_outputs import BaseModelOutputWithPast, SequenceClassifierOutputWithPast
+from .sensor_loc_reg import SENSOR_LOC_REGISTRY
+from .sensor_loc_finder import SensorLocFinder
+class MeasurementPredictorMixin(PreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.sensor_loc_type = config.sensor_loc_type
+        self.sensor_token = config.sensor_token
+        self.n_sensors = config.n_sensors
+        self.sensor_probes = torch.nn.ModuleList([
+            torch.nn.Linear(config.emb_dim, 1) for _ in range(config.n_sensors)
+        ])
+        self.use_aggregated = config.use_aggregated
+        if config.use_aggregated:
+            self.aggregate_probe = torch.nn.Linear(config.emb_dim, 1)
+        self.sensors_weight = config.sensors_weight
+        self.aggregate_weight = config.aggregate_weight
+        self.get_sensor_locs: SensorLocFinder = None
+    def init_sensor_loc_finder(self, tokenizer: PreTrainedTokenizerBase):
+        self.get_sensor_locs = SENSOR_LOC_REGISTRY[self.sensor_loc_type](
+            tokenizer, sensor_token=self.sensor_token, n_sensors=self.n_sensors
+        )
+    def forward(
+        self,
+        input_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[Tuple[Tuple[torch.Tensor]]] = None,
+        attention_mask: Optional[torch.FloatTensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        head_mask: Optional[torch.FloatTensor] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        labels: Optional[torch.LongTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple, SequenceClassifierOutputWithPast]:
+        r"""
+        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+            Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set
+            `labels = input_ids` Indices are selected in `[-100, 0, ..., config.vocab_size]` All labels set to `-100`
+            are ignored (masked), the loss is only computed for labels in `[0, ..., config.vocab_size]`
+        """
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        base_model_output: BaseModelOutputWithPast = self.base_model(
+            input_ids,
+            past_key_values=past_key_values,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        sensor_locs = self.get_sensor_locs(input_ids)
+        sensor_embs = base_model_output.last_hidden_state.gather(
+            1, sensor_locs.unsqueeze(-1).expand(-1, -1, self.config.emb_dim)
+        )
+        assert sensor_embs.shape == (input_ids.shape[0], self.n_sensors, self.config.emb_dim), f"{sensor_embs.shape} != {(input_ids.shape[0], self.n_sensors, self.config.emb_dim)}"
+        sensor_logits = torch.concat([self.sensor_probes[i](sensor_embs[:, i, :])
+                               for i in range(self.n_sensors)], dim=-1)
+        logits = sensor_logits
+        if self.use_aggregated:
+            last_emb = base_model_output.last_hidden_state[:, -1, :]
+            aggregate_logits = self.aggregate_probe(last_emb)
+            logits = torch.concat([logits, aggregate_logits], dim=-1)
+        loss = None
+        if labels is not None:
+            loss_fct = BCEWithLogitsLoss()
+            sensor_loss = loss_fct(sensor_logits, labels[:, :self.n_sensors]) * self.sensors_weight
+            loss = sensor_loss
+            if self.use_aggregated: #TOOD: should be use aggregate
+                aggregate_loss = loss_fct(aggregate_logits, labels[:, -1:]) * self.aggregate_weight
+                loss += aggregate_loss
+        if not return_dict:
+            output = (logits, ) + base_model_output[1:]
+            return ((loss,) + output) if loss is not None else output
+        return SequenceClassifierOutputWithPast(
+            loss=loss,
+            logits=logits,
+            past_key_values=base_model_output.past_key_values,
+            hidden_states=base_model_output.hidden_states,
+            attentions=base_model_output.attentions,
+        )

sensor_loc_finder.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from abc import ABC, abstractmethod
+import torch
+from transformers import PreTrainedTokenizerBase
+class SensorLocFinder(ABC):
+    @abstractmethod
+    def __init__(self, tokenizer: PreTrainedTokenizerBase, **kwargs):
+        pass
+    @abstractmethod
+    def find_sensor_locs(self, input_ids: torch.Tensor) -> torch.Tensor:
+        pass
+    def __call__(self, input_ids: torch.Tensor) -> torch.Tensor:
+        return self.find_sensor_locs(input_ids)

sensor_loc_reg.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from enum import Enum
+from .sensor_loc_stories import StoriesSensorLocFinder
+from .sensor_locs_from_token import SensorLocFinderFromToken
+SENSOR_LOC_REGISTRY = {
+    "stories": StoriesSensorLocFinder,
+    "locs_from_token": SensorLocFinderFromToken
+}

sensor_loc_stories.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import torch
+from transformers import PreTrainedTokenizerBase
+from .sensor_loc_finder import SensorLocFinder
+class StoriesSensorLocFinder(SensorLocFinder):
+    def __init__(self, tokenizer: PreTrainedTokenizerBase, **kwargs):
+        self.questions_section_toks = tokenizer.encode("## Questions")
+        self.question_mark_tok = tokenizer.encode("?")[0]
+        self.other_question_mark_tok = tokenizer.encode(")?")[0]
+        assert len(self.questions_section_toks) == 2
+    def find_sensor_locs(self, input_ids: torch.Tensor) -> torch.Tensor:
+        device = input_ids.device
+        question_mark_locs = self._is_sensor_loc(input_ids)
+        total_locs = torch.cumsum(question_mark_locs, dim=-1)
+        total_overall = total_locs[:, -1]
+        assert (
+            total_overall == 3
+        ).all(), "can handle different cases, but assuming this is easiest"
+        eqs = total_locs[:, :, None] == torch.arange(1, 4)[None, None].to(device)
+        locs = torch.where(
+            eqs.any(dim=-2),
+            torch.argmax(eqs.to(torch.uint8), dim=-2),
+            input_ids.shape[-1] - 3,
+        ).clamp(max=input_ids.shape[-1] - 3)
+        return locs
+    def _is_sensor_loc(self, input_ids: torch.Tensor):
+        questions_section_toks = self.questions_section_toks
+        question_mark_tok = self.question_mark_tok
+        other_question_mark_tok = self.other_question_mark_tok
+        eq_question_item = (input_ids[:, :-1] == questions_section_toks[0]) & (
+            input_ids[:, 1:] == questions_section_toks[1]
+        )
+        assert (eq_question_item.sum(dim=-1, dtype=torch.int) == 1).all(), "could relax"
+        summed = torch.cumsum(
+            torch.cat([eq_question_item, eq_question_item[:, -1:]], dim=-1), dim=-1
+        )
+        return (summed > 0) & (
+            (input_ids == question_mark_tok) | (input_ids == other_question_mark_tok)
+        )

sensor_locs_from_token.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import torch
+from transformers import PreTrainedTokenizerBase
+from .sensor_loc_finder import SensorLocFinder
+class SensorLocFinderFromToken(SensorLocFinder):
+    def __init__(self, tokenizer: PreTrainedTokenizerBase, sensor_token: str, n_sensors: int):
+        self.sensor_token_id = tokenizer.encode(sensor_token)[0]
+        self.n_sensors = n_sensors
+    def find_sensor_locs(self, input_ids: torch.Tensor) -> torch.Tensor:
+        flat_sensor_token_idxs = (input_ids == self.sensor_token_id).nonzero(as_tuple=True)[1]
+        sensor_token_idxs = flat_sensor_token_idxs.view(-1, self.n_sensors)
+        return sensor_token_idxs

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|endoftext|>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,327 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50257": {
+      "content": "                               ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50258": {
+      "content": "                              ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50259": {
+      "content": "                             ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50260": {
+      "content": "                            ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50261": {
+      "content": "                           ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50262": {
+      "content": "                          ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50263": {
+      "content": "                         ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50264": {
+      "content": "                        ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50265": {
+      "content": "                       ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50266": {
+      "content": "                      ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50267": {
+      "content": "                     ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50268": {
+      "content": "                    ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50269": {
+      "content": "                   ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50270": {
+      "content": "                  ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50271": {
+      "content": "                 ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50272": {
+      "content": "                ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50273": {
+      "content": "               ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50274": {
+      "content": "              ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50275": {
+      "content": "             ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50276": {
+      "content": "            ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50277": {
+      "content": "           ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50278": {
+      "content": "          ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50279": {
+      "content": "         ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50280": {
+      "content": "        ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50281": {
+      "content": "       ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50282": {
+      "content": "      ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50283": {
+      "content": "     ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50284": {
+      "content": "    ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50285": {
+      "content": "   ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50286": {
+      "content": "  ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50287": {
+      "content": "\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50288": {
+      "content": "\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50289": {
+      "content": "\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50290": {
+      "content": "\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50291": {
+      "content": "\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50292": {
+      "content": "\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50293": {
+      "content": "\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50294": {
+      "content": "\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 2048,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "left",
+  "return_token_type_ids": false,
+  "tokenizer_class": "CodeGenTokenizer",
+  "truncation_side": "left",
+  "unk_token": "<|endoftext|>"
+}

train.log ADDED Viewed

	@@ -0,0 +1 @@


1	+ [2024-12-16 18:54:19,529][accelerate.utils.other][WARNING] - Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e934d182467e275d01e51caa2e9e2886c0a9d04fa2b3d568b2fbe78aa009f50
+size 5112

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff