In [None]:
!pip install -U bitsandbytes transformers peft accelerate trl datasets sentencepiece wandb
!pip install flash-attn --no-build-isolation

In [None]:
MODEL_NAME = "CohereForAI/aya-23-8b"

# you may want to change the following parameters depending on your GPU configuration

# free T4 instance
# QUANTIZE_4BIT = True
# USE_GRAD_CHECKPOINTING = True
# TRAIN_BATCH_SIZE = 2
# TRAIN_MAX_SEQ_LENGTH = 512
# USE_FLASH_ATTENTION = False
# GRAD_ACC_STEPS = 16

# equivalent A100 setting
QUANTIZE_4BIT = True
USE_GRAD_CHECKPOINTING = True
TRAIN_BATCH_SIZE = 16
TRAIN_MAX_SEQ_LENGTH = 512
USE_FLASH_ATTENTION = True
GRAD_ACC_STEPS = 2

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
import os,torch
import bitsandbytes as bnb
from datasets import load_dataset
from trl import SFTTrainer
from datasets import Dataset
import pyarrow as pa
import pyarrow.dataset as ds
import pandas as pd
import re
import wandb

In [None]:
# Load Model
quantization_config = None
if QUANTIZE_4BIT:
  quantization_config = BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_quant_type="nf4",
      bnb_4bit_use_double_quant=True,
      bnb_4bit_compute_dtype=torch.bfloat16,
  )

attn_implementation = None
if USE_FLASH_ATTENTION:
  attn_implementation="flash_attention_2"

model = AutoModelForCausalLM.from_pretrained(
          MODEL_NAME,
          quantization_config=quantization_config,
          attn_implementation=attn_implementation,
          torch_dtype=torch.bfloat16,
          device_map="auto",
        )

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
def get_message_format(prompts):
  messages = []

  for p in prompts:
    messages.append(
        [{"role": "user", "content": p}]
      )

  return messages

def generate_aya_23(
      prompts,
      model,
      temperature=0.3,
      top_p=0.75,
      top_k=0,
      max_new_tokens=1024
    ):

  messages = get_message_format(prompts)

  input_ids = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        padding=True,
        return_tensors="pt",
      )
  input_ids = input_ids.to(model.device)
  prompt_padded_len = len(input_ids[0])

  gen_tokens = model.generate(
        input_ids,
        temperature=temperature,
        top_p=top_p,
        top_k=top_k,
        max_new_tokens=max_new_tokens,
        do_sample=True,
      )

  # get only generated tokens
  gen_tokens = [
      gt[prompt_padded_len:] for gt in gen_tokens
    ]

  gen_text = tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)
  return gen_text

In [None]:
# Test generations on langauges in Aya 23 set
prompts = [
    "Write a list of three fruits and tell me about each of them", # English
    "Viết danh sách ba loại trái cây và kể cho tôi nghe về từng loại trái cây đó", # Vietnamese
    "3 つの果物のリストを書いて、それぞれについて教えてください", # Japanese
    "Üç meyveden oluşan bir liste yazın ve bana her birini anlatın" # Turkish
]

generations = generate_aya_23(prompts, model)

for p, g in zip(prompts, generations):
  print(
      "PROMPT", p ,"RESPONSE", g, "\n", sep="\n"
    )

PROMPT
Write a list of three fruits and tell me about each of them
RESPONSE
Sure! Here is a list of three fruits, along with some information about each of them:

1. Apple: Apples are a popular fruit that are widely cultivated across the world. They are typically round or oval in shape and come in a variety of colors, including red, green, yellow, and a blend of these colors. Apples are known for their crisp texture and sweet or tart taste. They are a good source of dietary fiber, vitamins, and antioxidants.

2. Banana: Bananas are long, curved fruits that come in a range of colors, from yellow to brown. They are a good source of potassium, vitamins, and fiber. Bananas have a sweet taste and are often eaten raw, but they can also be used in baking or blended into smoothies.

3. Orange: Oranges are citrus fruits known for their vibrant orange color and sweet, tangy taste. They are a good source of vitamin C and other nutrients. Oranges can be eaten fresh, juiced, or used in various dish

In [None]:
# Test Bengali (not in Aya 23 set) inference on base model

prompts = [
  'Translate from English to Bengali: "Rates are competitive, almost always the best in the market"'
]

generations = generate_aya_23(prompts, model)

for p, g in zip(prompts, generations):
  print(
      "PROMPT", p ,"RESPONSE", g, "\n", sep="\n"
    )

PROMPT
Translate from English to Bengali: "Rates are competitive, almost always the best in the market"
RESPONSE
"পরিণতি সংসাধানকরি, বাজারের সম্পর্কে সম্প্রতি সবচেয়ে বেশি"




In [None]:
# Load an English to Bengali translation dataset from Aya Collection
dataset = load_dataset("CohereForAI/aya_collection", "templated_indic_sentiment")['train']
dataset = dataset.filter(lambda example: example['language']=='ben')

def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['inputs'])):
        text = f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{example['inputs'][i]}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{example['targets'][i]}"
        output_texts.append(text)
    return output_texts

In [None]:
# Training Arguments
training_arguments = TrainingArguments(
    output_dir="results",
    num_train_epochs=20,
    per_device_train_batch_size=TRAIN_BATCH_SIZE,
    gradient_accumulation_steps=GRAD_ACC_STEPS,
    gradient_checkpointing=USE_GRAD_CHECKPOINTING,
    optim="paged_adamw_32bit",
    save_steps=50,
    logging_steps=10,
    learning_rate=1e-3,
    weight_decay=0.001,
    fp16=False,
    bf16=True,
    warmup_ratio=0.05,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="none"
)

peft_config = LoraConfig(
    lora_alpha=32,
    r=32,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"]
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    max_seq_length=TRAIN_MAX_SEQ_LENGTH,
    tokenizer=tokenizer,
    args=training_arguments,
    formatting_func=formatting_prompts_func
)



In [None]:
trainer.train()

In [None]:
# Save the model to disk
trainer.model.save_pretrained(save_directory='aya-qlora')
model.config.use_cache = True
model.eval()

In [None]:
# Test Bengali inference on loaded fine-tuned model

# Load Model and LoRA Adapter
quantization_config = None
if QUANTIZE_4BIT:
  quantization_config = BitsAndBytesConfig(
      load_in_4bit=True,
      bnb_4bit_quant_type="nf4",
      bnb_4bit_use_double_quant=True,
      bnb_4bit_compute_dtype=torch.bfloat16,
  )

attn_implementation = None
if USE_FLASH_ATTENTION:
  attn_implementation="flash_attention_2"

loaded_model = AutoModelForCausalLM.from_pretrained(
          MODEL_NAME,
          quantization_config=quantization_config,
          attn_implementation=attn_implementation,
          torch_dtype=torch.bfloat16,
          device_map="auto",
        )
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
loaded_model.load_adapter("aya-qlora")


prompts = [
  'Translate from English to Bengali: "Rates are competitive, almost always the best in the market"'
]

generations = generate_aya_23(prompts, loaded_model)

for p, g in zip(prompts, generations):
  print(
      "PROMPT", p ,"RESPONSE", g, "\n", sep="\n"
    )

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


PROMPT
Translate from English to Bengali: "Rates are competitive, almost always the best in the market"
RESPONSE
"দরগুলি প্রতিযোগিতামূলক, প্রায় সবসময় বাজারে সেরা"


