In [1]:
!pip install -q bitsandbytes
!pip install -q transformers
!pip install -q accelerate
!pip install -q datasets
!pip install -q peft

In [None]:
from datasets import load_dataset
train_dataset = load_dataset("text", data_files="/kaggle/input/light-novel/Light Novels main.txt", split="train[:2000]")
test_dataset = load_dataset("text", data_files="/kaggle/input/light-novel/Light Novels main.txt", split="train[2001:2501]")
print(train_dataset)
print(test_dataset)
#train_small_dataset = train_dataset['train'][:100]
#train_small_dataset = train_dataset['train'][:100]
#small_dataset

In [None]:
from accelerate import FullyShardedDataParallelPlugin, Accelerator
from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig

fsdp_plugin = FullyShardedDataParallelPlugin(
    state_dict_config=FullStateDictConfig(offload_to_cpu=True, rank0_only=False),
    optim_state_dict_config=FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=False),
)

accelerator = Accelerator(fsdp_plugin=fsdp_plugin)

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

In [None]:


base_model_id = "mistralai/Mistral-7B-v0.1"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config)

In [None]:
def formatting_func(example):
    text = f"### {example}"
    return text

In [None]:
tokenizer = AutoTokenizer.from_pretrained(
    base_model_id,
    model_max_length=512,
    padding_side="left",
    add_eos_token=True)

tokenizer.pad_token = tokenizer.eos_token

In [None]:
max_length = 512 # This was an appropriate max length for my dataset

def generate_and_tokenize_prompt2(prompt):
    result = tokenizer(
        formatting_func(prompt),
        truncation=True,
        max_length=max_length,
        padding="max_length",
    )
    result["labels"] = result["input_ids"].copy()
    return result

In [None]:
tokenized_train_dataset = train_dataset.map(generate_and_tokenize_prompt2)
tokenized_test_dataset = test_dataset.map(generate_and_tokenize_prompt2)

In [None]:
eval_prompt = "Mathis was a skater boy, she said 'see you later, boy'"

In [None]:
# Init an eval tokenizer that doesn't add padding or eos token
eval_tokenizer = AutoTokenizer.from_pretrained(
    base_model_id,
    add_bos_token=True,
)

model_input = eval_tokenizer(eval_prompt, return_tensors="pt").to("cuda")

model.eval()
with torch.no_grad():
    print(eval_tokenizer.decode(model.generate(**model_input, max_new_tokens=256, repetition_penalty=1.15)[0], skip_special_tokens=True))

In [None]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [None]:
print(model)

In [None]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=32,
    lora_alpha=64,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
    bias="none",
    lora_dropout=0.05,  # Conventional
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

In [None]:
if torch.cuda.device_count() > 1: # If more than 1 GPU
    model.is_parallelizable = True
    model.model_parallel = True

model = accelerator.prepare_model(model)

In [None]:
import transformers
from datetime import datetime

project = "flo-finetune"
base_model_name = "mistral"
run_name = base_model_name + "-" + project
output_dir = "./" + run_name

trainer = transformers.Trainer(
    model=model,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_test_dataset,
    args=transformers.TrainingArguments(
        output_dir=output_dir,
        warmup_steps=1,
        per_device_train_batch_size=2,
        gradient_accumulation_steps=1,
        gradient_checkpointing=True,
        max_steps=500,
        learning_rate=2.5e-5,
        optim="paged_adamw_8bit",
        logging_steps=25,              # When to start reporting loss
        logging_dir="./logs",        # Directory for storing logs
        save_strategy="steps",       # Save the model checkpoint every logging step
        save_steps=25,                # Save checkpoints every 50 steps
        evaluation_strategy="steps", # Evaluate the model every logging step
        eval_steps=25,               # Evaluate and save checkpoints every 50 steps
        do_eval=True,                # Perform evaluation at the end of training
        #report_to="wandb",           # Comment this out if you don't want to use weights & baises
        run_name=f"{run_name}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}"          # Name of the W&B run (optional)
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

## Test en local de notre modèle

In [None]:
!pip install -q bitsandbytes
!pip install -q transformers
!pip install -q accelerate
!pip install -q datasets
!pip install -q peft

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline, AutoModel

2024-02-16 15:06:50.797593: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-16 15:06:50.797725: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-16 15:06:50.934033: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


KeyboardInterrupt: 

In [9]:
base_model_id = "mistralai/Mistral-7B-v0.1"
#base_model_id = "FloVolo/mistral-flo-finetune-2-T4"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config)

tokenizer = AutoTokenizer.from_pretrained(
    base_model_id,
    add_bos_token=True,
)

tokenizer.pad_token = tokenizer.eos_token

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

ImportError: Using `load_in_8bit=True` requires Accelerate: `pip install accelerate` and the latest version of bitsandbytes `pip install -i https://test.pypi.org/simple/ bitsandbytes` or `pip install bitsandbytes`.

In [13]:
device = torch.device('cuda')

eval_prompt = "Here's the list of why I'm a huge dumbass :"

model_input = tokenizer(eval_prompt, return_tensors="pt").to(device)

model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=256, repetition_penalty=1.15)[0], skip_special_tokens=True))


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Here's the list of why I'm a huge dumbass :

1. I didn't know that you can't use a 3rd party app to get your phone number from Facebook, so I used one and got my account banned for 2 weeks.

2. I was trying to find out how to make an app on Android Studio, but I couldn't figure it out because I don't have any idea about coding or anything like that.

3. I tried to download a game called "Pokemon Go" but I couldn't find it in the Play Store, so I went to the website and downloaded it there. It turned out to be a virus.

4. I wanted to watch some videos on YouTube, but I couldn't find them because they were blocked by my school's firewall. So I decided to go to another site where I could watch them without being blocked. But when I clicked on the link, it took me to a page with ads all over it! And then it said "You must be logged into Google Chrome before continuing." So now I'm stuck here forever...

5. I had this really cool idea for a new app called "The Best App Ever!" but I forgot 

In [14]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [15]:
model.push_to_hub('FloVolo/mistral-flo-finetune-2-T4')

README.md:   0%|          | 0.00/1.23k [00:00<?, ?B/s]



adapter_model.safetensors:   0%|          | 0.00/600M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/FloVolo/mistral-flo-finetune-2-T4/commit/39736c38b46ed3ca5873b35c54934424de4a443a', commit_message='Upload MistralForCausalLM', commit_description='', oid='39736c38b46ed3ca5873b35c54934424de4a443a', pr_url=None, pr_revision=None, pr_num=None)