Riddhi Bhagwat commited on
Commit
1893204
·
1 Parent(s): 67312ac

organization of files & debugging reward_eval file

Browse files
ml/eval/data_transform_pipeline.py DELETED
@@ -1,80 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
-
4
- # NOTE: names of preset cols may be different based on dataset, this is just a generalized pipeline
5
-
6
- CHOSEN_COLUMN = 'chosen' # name of col with chosen responses
7
- REJECTED_COLUMN = 'rejected' # name of col with rejected responses
8
- COLUMNS_TO_DROP = ['metadata', 'timestamp', 'id'] # cols to remove
9
-
10
- def transform_rlhf_dataset(df, chosen_col=CHOSEN_COLUMN, rejected_col=REJECTED_COLUMN, drop_cols=COLUMNS_TO_DROP):
11
- """
12
- Parameters:
13
- df (pandas.DataFrame): Input dataframe with chosen and rejected columns
14
- chosen_col (str): Name of column containing chosen responses
15
- rejected_col (str): Name of column containing rejected responses
16
- drop_cols (list): List of column names to drop from the dataset
17
-
18
- Returns:
19
- pandas.DataFrame: Transformed dataset with 'text' and 'label' columns
20
- """
21
-
22
- df = df.copy()
23
-
24
- existing_cols_to_drop = [col for col in drop_cols if col in df.columns]
25
- if existing_cols_to_drop:
26
- df = df.drop(columns=existing_cols_to_drop)
27
-
28
- preserved_cols = [col for col in df.columns if col not in [chosen_col, rejected_col]]
29
-
30
- # two separate dataframes for liked and disliked
31
- liked_df = df[[chosen_col]].copy()
32
- liked_df.columns = ['text']
33
- liked_df['label'] = 'liked'
34
-
35
- disliked_df = df[[rejected_col]].copy()
36
- disliked_df.columns = ['text']
37
- disliked_df['label'] = 'disliked'
38
-
39
- for col in preserved_cols:
40
- liked_df[col] = df[col]
41
- for col in preserved_cols:
42
- disliked_df[col] = df[col]
43
-
44
- # combine + shuffle
45
- transformed_df = pd.concat([liked_df, disliked_df], ignore_index=True)
46
- transformed_df = transformed_df.dropna(subset=['text'])
47
- transformed_df = transformed_df.sample(frac=1).reset_index(drop=True)
48
-
49
- # reordering
50
- column_order = ['text', 'label'] + preserved_cols
51
- transformed_df = transformed_df[column_order]
52
-
53
- return transformed_df
54
-
55
- def test_example():
56
- example_data = {
57
- 'chosen': ['This is a good response', 'Another good one'],
58
- 'rejected': ['This is a bad response', 'Another bad one'],
59
- 'metadata': ['meta1', 'meta2'],
60
- 'timestamp': ['2024-01-01', '2024-01-02'],
61
- 'id': [1, 2]
62
- }
63
-
64
- df = pd.DataFrame(example_data)
65
- transformed_df = transform_rlhf_dataset(
66
- df,
67
- chosen_col='chosen',
68
- rejected_col='rejected',
69
- drop_cols=['metadata', 'id']
70
- )
71
-
72
- print("Original shape:", df.shape)
73
- print("\nTransformed shape:", transformed_df.shape)
74
- print("\nTransformation sample:")
75
- print(transformed_df.head())
76
- print("\nLabel distribution:")
77
- print(transformed_df['label'].value_counts())
78
-
79
- if __name__ == "__main__":
80
- test_example()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ml/eval/reward_eval.py CHANGED
@@ -3,10 +3,8 @@ import os
3
  from typing import Any, Dict, List
4
  import json
5
  import torch
6
- import transformers
7
- from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification
8
  from accelerate import Accelerator
9
- from trl import KTOConfig, KTOTrainer, ModelConfig, get_peft_config, maybe_unpair_preference_dataset, setup_chat_format
10
  from tqdm import tqdm
11
 
12
  # Add script directory to system path for importing local modules
@@ -24,7 +22,7 @@ def create_model(model_name: str):
24
  """
25
  loads pre-trained reward model and moves it onto device
26
  """
27
- model = AutoModelForSequenceClassification.from_pretrained(model_name, torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", num_labels=1).to("cuda")
28
  return model
29
 
30
 
@@ -91,10 +89,10 @@ def process_evaluation(args, model_name: str, eval_data_list_dict) -> List[Dict[
91
  """
92
  Main function for processing evaluation, takes model name as input.
93
  """
94
- mixed_precision = 'bf16' if args.bfloat16 else 'fp16'
95
-
96
  # Initialize accelerator and model
97
- accelerator = MyAccelerator(mixed_precision)
98
  model = create_model(model_name)
99
  tokenizer = create_tokenizer(model_name)
100
 
@@ -111,15 +109,15 @@ def process_evaluation(args, model_name: str, eval_data_list_dict) -> List[Dict[
111
 
112
  # ONLY FOR TESTING:
113
  if __name__ == '__main__':
114
- args = {
115
- 'bfloat16': False,
116
- 'reward_output_fmt': '1-0',
117
- 'apply_sigmoid_to_reward': False,
118
- 'per_device_batch_size': 8,
119
- 'output_filepath': '/path/to/your/data.json',
120
- 'result_filename': None,
121
- }
122
 
123
  eval_data_list_dict = [{"prompt": "How are you?", "output": "I'm doing great!"}, {"prompt": "What's your name?", "output": "Assistant"}]
124
 
125
- process_evaluation(args, model_name="CohereForAI/aya-23-8B", eval_data_list_dict=eval_data_list_dict)
 
3
  from typing import Any, Dict, List
4
  import json
5
  import torch
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer, CohereConfig, AutoModel
 
7
  from accelerate import Accelerator
 
8
  from tqdm import tqdm
9
 
10
  # Add script directory to system path for importing local modules
 
22
  """
23
  loads pre-trained reward model and moves it onto device
24
  """
25
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", num_labels=1).to("cuda")
26
  return model
27
 
28
 
 
89
  """
90
  Main function for processing evaluation, takes model name as input.
91
  """
92
+ # mixed_precision = 'bf16' if args.bfloat16 else 'fp16'
93
+
94
  # Initialize accelerator and model
95
+ # accelerator = MyAccelerator(mixed_precision)
96
  model = create_model(model_name)
97
  tokenizer = create_tokenizer(model_name)
98
 
 
109
 
110
  # ONLY FOR TESTING:
111
  if __name__ == '__main__':
112
+ args = EvalArguments(bfloat16=True,
113
+ reward_output_fmt='1-0',
114
+ apply_sigmoid_to_reward=False,
115
+ per_device_batch_size=8,
116
+ output_filepath= '/path/to/your/data.json',
117
+ result_filename=None,
118
+ model_name_or_path="CohereForAI/aya-expanse-8b")
119
+
120
 
121
  eval_data_list_dict = [{"prompt": "How are you?", "output": "I'm doing great!"}, {"prompt": "What's your name?", "output": "Assistant"}]
122
 
123
+ process_evaluation(args, model_name="CohereForAI/aya-expanse-8b", eval_data_list_dict=eval_data_list_dict)
ml/eval/tempCodeRunnerFile.py ADDED
File without changes