Spaces:
Running
on
Zero
Running
on
Zero
Riddhi Bhagwat
commited on
Commit
·
1893204
1
Parent(s):
67312ac
organization of files & debugging reward_eval file
Browse files- ml/eval/data_transform_pipeline.py +0 -80
- ml/eval/reward_eval.py +14 -16
- ml/eval/tempCodeRunnerFile.py +0 -0
ml/eval/data_transform_pipeline.py
DELETED
@@ -1,80 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
import numpy as np
|
3 |
-
|
4 |
-
# NOTE: names of preset cols may be different based on dataset, this is just a generalized pipeline
|
5 |
-
|
6 |
-
CHOSEN_COLUMN = 'chosen' # name of col with chosen responses
|
7 |
-
REJECTED_COLUMN = 'rejected' # name of col with rejected responses
|
8 |
-
COLUMNS_TO_DROP = ['metadata', 'timestamp', 'id'] # cols to remove
|
9 |
-
|
10 |
-
def transform_rlhf_dataset(df, chosen_col=CHOSEN_COLUMN, rejected_col=REJECTED_COLUMN, drop_cols=COLUMNS_TO_DROP):
|
11 |
-
"""
|
12 |
-
Parameters:
|
13 |
-
df (pandas.DataFrame): Input dataframe with chosen and rejected columns
|
14 |
-
chosen_col (str): Name of column containing chosen responses
|
15 |
-
rejected_col (str): Name of column containing rejected responses
|
16 |
-
drop_cols (list): List of column names to drop from the dataset
|
17 |
-
|
18 |
-
Returns:
|
19 |
-
pandas.DataFrame: Transformed dataset with 'text' and 'label' columns
|
20 |
-
"""
|
21 |
-
|
22 |
-
df = df.copy()
|
23 |
-
|
24 |
-
existing_cols_to_drop = [col for col in drop_cols if col in df.columns]
|
25 |
-
if existing_cols_to_drop:
|
26 |
-
df = df.drop(columns=existing_cols_to_drop)
|
27 |
-
|
28 |
-
preserved_cols = [col for col in df.columns if col not in [chosen_col, rejected_col]]
|
29 |
-
|
30 |
-
# two separate dataframes for liked and disliked
|
31 |
-
liked_df = df[[chosen_col]].copy()
|
32 |
-
liked_df.columns = ['text']
|
33 |
-
liked_df['label'] = 'liked'
|
34 |
-
|
35 |
-
disliked_df = df[[rejected_col]].copy()
|
36 |
-
disliked_df.columns = ['text']
|
37 |
-
disliked_df['label'] = 'disliked'
|
38 |
-
|
39 |
-
for col in preserved_cols:
|
40 |
-
liked_df[col] = df[col]
|
41 |
-
for col in preserved_cols:
|
42 |
-
disliked_df[col] = df[col]
|
43 |
-
|
44 |
-
# combine + shuffle
|
45 |
-
transformed_df = pd.concat([liked_df, disliked_df], ignore_index=True)
|
46 |
-
transformed_df = transformed_df.dropna(subset=['text'])
|
47 |
-
transformed_df = transformed_df.sample(frac=1).reset_index(drop=True)
|
48 |
-
|
49 |
-
# reordering
|
50 |
-
column_order = ['text', 'label'] + preserved_cols
|
51 |
-
transformed_df = transformed_df[column_order]
|
52 |
-
|
53 |
-
return transformed_df
|
54 |
-
|
55 |
-
def test_example():
|
56 |
-
example_data = {
|
57 |
-
'chosen': ['This is a good response', 'Another good one'],
|
58 |
-
'rejected': ['This is a bad response', 'Another bad one'],
|
59 |
-
'metadata': ['meta1', 'meta2'],
|
60 |
-
'timestamp': ['2024-01-01', '2024-01-02'],
|
61 |
-
'id': [1, 2]
|
62 |
-
}
|
63 |
-
|
64 |
-
df = pd.DataFrame(example_data)
|
65 |
-
transformed_df = transform_rlhf_dataset(
|
66 |
-
df,
|
67 |
-
chosen_col='chosen',
|
68 |
-
rejected_col='rejected',
|
69 |
-
drop_cols=['metadata', 'id']
|
70 |
-
)
|
71 |
-
|
72 |
-
print("Original shape:", df.shape)
|
73 |
-
print("\nTransformed shape:", transformed_df.shape)
|
74 |
-
print("\nTransformation sample:")
|
75 |
-
print(transformed_df.head())
|
76 |
-
print("\nLabel distribution:")
|
77 |
-
print(transformed_df['label'].value_counts())
|
78 |
-
|
79 |
-
if __name__ == "__main__":
|
80 |
-
test_example()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ml/eval/reward_eval.py
CHANGED
@@ -3,10 +3,8 @@ import os
|
|
3 |
from typing import Any, Dict, List
|
4 |
import json
|
5 |
import torch
|
6 |
-
import
|
7 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification
|
8 |
from accelerate import Accelerator
|
9 |
-
from trl import KTOConfig, KTOTrainer, ModelConfig, get_peft_config, maybe_unpair_preference_dataset, setup_chat_format
|
10 |
from tqdm import tqdm
|
11 |
|
12 |
# Add script directory to system path for importing local modules
|
@@ -24,7 +22,7 @@ def create_model(model_name: str):
|
|
24 |
"""
|
25 |
loads pre-trained reward model and moves it onto device
|
26 |
"""
|
27 |
-
model =
|
28 |
return model
|
29 |
|
30 |
|
@@ -91,10 +89,10 @@ def process_evaluation(args, model_name: str, eval_data_list_dict) -> List[Dict[
|
|
91 |
"""
|
92 |
Main function for processing evaluation, takes model name as input.
|
93 |
"""
|
94 |
-
mixed_precision = 'bf16' if args.bfloat16 else 'fp16'
|
95 |
-
|
96 |
# Initialize accelerator and model
|
97 |
-
accelerator = MyAccelerator(mixed_precision)
|
98 |
model = create_model(model_name)
|
99 |
tokenizer = create_tokenizer(model_name)
|
100 |
|
@@ -111,15 +109,15 @@ def process_evaluation(args, model_name: str, eval_data_list_dict) -> List[Dict[
|
|
111 |
|
112 |
# ONLY FOR TESTING:
|
113 |
if __name__ == '__main__':
|
114 |
-
args =
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
|
123 |
eval_data_list_dict = [{"prompt": "How are you?", "output": "I'm doing great!"}, {"prompt": "What's your name?", "output": "Assistant"}]
|
124 |
|
125 |
-
process_evaluation(args, model_name="CohereForAI/aya-
|
|
|
3 |
from typing import Any, Dict, List
|
4 |
import json
|
5 |
import torch
|
6 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, CohereConfig, AutoModel
|
|
|
7 |
from accelerate import Accelerator
|
|
|
8 |
from tqdm import tqdm
|
9 |
|
10 |
# Add script directory to system path for importing local modules
|
|
|
22 |
"""
|
23 |
loads pre-trained reward model and moves it onto device
|
24 |
"""
|
25 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2", num_labels=1).to("cuda")
|
26 |
return model
|
27 |
|
28 |
|
|
|
89 |
"""
|
90 |
Main function for processing evaluation, takes model name as input.
|
91 |
"""
|
92 |
+
# mixed_precision = 'bf16' if args.bfloat16 else 'fp16'
|
93 |
+
|
94 |
# Initialize accelerator and model
|
95 |
+
# accelerator = MyAccelerator(mixed_precision)
|
96 |
model = create_model(model_name)
|
97 |
tokenizer = create_tokenizer(model_name)
|
98 |
|
|
|
109 |
|
110 |
# ONLY FOR TESTING:
|
111 |
if __name__ == '__main__':
|
112 |
+
args = EvalArguments(bfloat16=True,
|
113 |
+
reward_output_fmt='1-0',
|
114 |
+
apply_sigmoid_to_reward=False,
|
115 |
+
per_device_batch_size=8,
|
116 |
+
output_filepath= '/path/to/your/data.json',
|
117 |
+
result_filename=None,
|
118 |
+
model_name_or_path="CohereForAI/aya-expanse-8b")
|
119 |
+
|
120 |
|
121 |
eval_data_list_dict = [{"prompt": "How are you?", "output": "I'm doing great!"}, {"prompt": "What's your name?", "output": "Assistant"}]
|
122 |
|
123 |
+
process_evaluation(args, model_name="CohereForAI/aya-expanse-8b", eval_data_list_dict=eval_data_list_dict)
|
ml/eval/tempCodeRunnerFile.py
ADDED
File without changes
|