In [1]:
from dataset import GOOGLE, GOODBYE, CHAT, VISION, GITHUB
data = GOOGLE + GOODBYE + CHAT + VISION + GITHUB

print(len(data))
    


757


In [5]:
%pip install transformers[torch]
%pip install accelerate -U

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
import torch
from torch.utils.data import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from sklearn.model_selection import train_test_split

class CustomDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Load the tokenizer and the model
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=5)

# Prepare the dataset
texts = [item[0] for item in data]
labels = [item[1] for item in data]
label_map = {'vision': 0, 'chat': 1, 'goodbye': 2, 'google': 3, 'github': 4}
labels = [label_map[label] for label in labels]

# Split the dataset into training and validation sets
train_texts, val_texts, train_labels, val_labels = train_test_split(texts, labels, test_size=0.2, random_state=42)

# Tokenize the text
train_encodings = tokenizer(train_texts, truncation=True, padding=True)
val_encodings = tokenizer(val_texts, truncation=True, padding=True)

# Create the custom dataset
train_dataset = CustomDataset(train_encodings, train_labels)
val_dataset = CustomDataset(val_encodings, val_labels)

# Create the Trainer
training_args = TrainingArguments(
    output_dir='../models',
    num_train_epochs=10,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    logging_dir='./logs',
    learning_rate=5e-5,
    save_total_limit=1,
    evaluation_strategy="epoch",
    save_strategy="epoch",  # Save a checkpoint at the end of each epoch
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

# Fine-tune the model
trainer.train()


  from .autonotebook import tqdm as notebook_tqdm
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the mod

{'eval_loss': 0.030141128227114677, 'eval_runtime': 10.8983, 'eval_samples_per_second': 13.947, 'eval_steps_per_second': 1.743, 'epoch': 1.0}


                                                  
 20%|██        | 152/760 [08:26<31:02,  3.06s/it]

{'eval_loss': 0.008563311770558357, 'eval_runtime': 12.2288, 'eval_samples_per_second': 12.43, 'eval_steps_per_second': 1.554, 'epoch': 2.0}


                                                   
 30%|███       | 228/760 [12:35<27:06,  3.06s/it]

{'eval_loss': 0.04210153967142105, 'eval_runtime': 12.9654, 'eval_samples_per_second': 11.724, 'eval_steps_per_second': 1.465, 'epoch': 3.0}


                                                   
 40%|████      | 304/760 [16:51<23:59,  3.16s/it]

{'eval_loss': 0.016036802902817726, 'eval_runtime': 10.8075, 'eval_samples_per_second': 14.064, 'eval_steps_per_second': 1.758, 'epoch': 4.0}


                                                 
 50%|█████     | 380/760 [20:47<17:27,  2.76s/it]

{'eval_loss': 0.01568855531513691, 'eval_runtime': 10.5235, 'eval_samples_per_second': 14.444, 'eval_steps_per_second': 1.805, 'epoch': 5.0}


                                                 
 60%|██████    | 456/760 [25:03<13:46,  2.72s/it]

{'eval_loss': 0.0158185176551342, 'eval_runtime': 10.4724, 'eval_samples_per_second': 14.514, 'eval_steps_per_second': 1.814, 'epoch': 6.0}


 66%|██████▌   | 500/760 [27:32<15:42,  3.63s/it]

{'loss': 0.0954, 'learning_rate': 1.7105263157894737e-05, 'epoch': 6.58}


                                                 
 70%|███████   | 532/760 [29:25<11:00,  2.90s/it]

{'eval_loss': 0.01628260686993599, 'eval_runtime': 10.8747, 'eval_samples_per_second': 13.977, 'eval_steps_per_second': 1.747, 'epoch': 7.0}


                                                 
 80%|████████  | 608/760 [33:35<07:13,  2.85s/it]

{'eval_loss': 0.016459450125694275, 'eval_runtime': 12.6362, 'eval_samples_per_second': 12.029, 'eval_steps_per_second': 1.504, 'epoch': 8.0}


                                                 
 90%|█████████ | 684/760 [38:00<03:55,  3.09s/it]

{'eval_loss': 0.01655273139476776, 'eval_runtime': 11.1512, 'eval_samples_per_second': 13.631, 'eval_steps_per_second': 1.704, 'epoch': 9.0}


                                                 
100%|██████████| 760/760 [42:00<00:00,  2.67s/it]

{'eval_loss': 0.016479341313242912, 'eval_runtime': 10.1493, 'eval_samples_per_second': 14.976, 'eval_steps_per_second': 1.872, 'epoch': 10.0}


100%|██████████| 760/760 [42:01<00:00,  3.32s/it]

{'train_runtime': 2521.8708, 'train_samples_per_second': 2.399, 'train_steps_per_second': 0.301, 'train_loss': 0.06297851167619228, 'epoch': 10.0}





TrainOutput(global_step=760, training_loss=0.06297851167619228, metrics={'train_runtime': 2521.8708, 'train_samples_per_second': 2.399, 'train_steps_per_second': 0.301, 'train_loss': 0.06297851167619228, 'epoch': 10.0})

In [4]:
from transformers import pipeline

# Load the fine-tuned model
# model_path = '../models/cd_CKPT_V'
model_path = '../models\checkpoint-760'
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
classifier = pipeline('text-classification', model=model_path, tokenizer=tokenizer)

def command_filter(prompt):
    # Classify the input prompt
    result = classifier(prompt)
    command_id = int(result[0]['label'].split('_')[-1])
    command = {0: 'vision', 1: 'chat', 2: 'goodbye', 3: 'google', 4: 'github'}[command_id]

    return command
    
# Example prompts

prompts = ["Hello there!",
           "I'd like you to tell me about powerlifting",
           "Can you see me?",
           "What do you see in this image?",
           "See you tomorrow!",
           "Goodbye GPT",
           "What is a compiled programing language?",
           "How many calories does Ultra White Monster Energy have?",
           "Let's create a new project",
           "I want to open a new repo"]

for prompt in prompts:


    print(f'{prompt} : {command_filter(prompt)}')


Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


Hello there! : chat
I'd like you to tell me about powerlifting : chat
Can you see me? : vision
What do you see in this image? : vision
See you tomorrow! : goodbye
Goodbye GPT : goodbye
What is a compiled programing language? : google
How many calories does Ultra White Monster Energy have? : google
Let's create a new project : github
I want to open a new repo : github
