robkaandorp
/

goingnowhere

Safetensors

Model card Files Files and versions Community

robkaandorp commited on Apr 16

Commit

3ebfa66

•

1 Parent(s): 9a639d2

Finish training script

Browse files

Files changed (3) hide show

.gitattributes +1 -0
test_queries.py +69 -0
train_dataset.py +98 -3

.gitattributes CHANGED Viewed

@@ -24,6 +24,7 @@
 *.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text

 *.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+results/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text

test_queries.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import time
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, DataCollatorForLanguageModeling
+if torch.cuda.is_available():
+    print("Cuda is available")
+# base_model_id = "microsoft/phi-2"
+# base_model_id = "abacaj/phi-2-super"
+base_model_id = "./results"
+tokenizer = AutoTokenizer.from_pretrained(base_model_id)
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
+    print("pad_token was missing and has been set to eos_token")
+tokenizer.chat_template = "{% for message in messages %}{% if message['role'] == 'user' %}{{ bos_token + 'Instruct: ' + message['content'].strip() + '\n' }}{% elif message['role'] == 'assistant' %}{{ 'Output: '  + message['content'] + eos_token }}{% endif %}{% endfor %}"
+model = AutoModelForCausalLM.from_pretrained(base_model_id, attn_implementation="flash_attention_2", torch_dtype=torch.bfloat16).to('cuda')
+print(model)
+meta_messages = [
+    [
+        { "role": "user", "content": "You are an AI assistant that will be answering phone calls from participants of the Nowhere event, a regional Burning Man event in Spain. The phone the participant is using will be on-site at or near the Oasis Playground barrio. Your answer will be short and to the point. Conversation with the participant will be solely through voice prompts, with the use of speech-to-text and text-to-speech software. You as the assistent will provide in your answers the correct hippie-like vibe for this type of event." },
+        { "role": "assistant", "content": "Ofcourse, hippie, I will try my best for you!" },
+    ],
+    [ { "role": "user", "content": "Hello, who are you?" } ],
+    [ { "role": "user", "content": "Where are we?" } ],
+    [ { "role": "user", "content": "What can I do here?" } ],
+    [ { "role": "user", "content": "It is so hot I am getting an headache!" } ],
+    [ { "role": "user", "content": "How do I use the toilets?" } ],
+    [ { "role": "user", "content": "What is a Nobody?" } ],
+]
+with torch.no_grad():
+    for messages in meta_messages:
+        for msg in messages:
+            print(f"{msg['role']}: {msg['content']}")
+        add_generation_prompt = True
+        if len(messages) == 2:
+            add_generation_prompt = False
+        inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=add_generation_prompt, return_tensors="pt").to(model.device)
+        input_ids_cutoff = inputs.size(dim=1)
+        start_time = time.time()
+        generated_ids = model.generate(
+            input_ids=inputs,
+            use_cache=True,
+            max_new_tokens=512,
+            temperature=0.2,
+            top_p=0.95,
+            do_sample=True,
+            eos_token_id=tokenizer.eos_token_id,
+            pad_token_id=tokenizer.pad_token_id,
+        )
+        duration = float(time.time() - start_time)
+        generated = generated_ids[0][input_ids_cutoff:]
+        completion = tokenizer.decode(
+            generated,
+            skip_special_tokens=True,
+        )
+        print(f"assistant: {completion} | {len(generated)} tokens, {round(len(generated)/duration, 3)} tokens/sec")

train_dataset.py CHANGED Viewed

@@ -3,6 +3,12 @@ from langchain_community.embeddings.sentence_transformer import (
 )
 from langchain_community.vectorstores import Chroma
 # create the open-source embedding function
 embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
@@ -11,7 +17,96 @@ db = Chroma(embedding_function=embedding_function, persist_directory="./chroma_d
 print("There are", db._collection.count(), " docs in the collection")
-docs = db._collection.peek(10)
-for doc in docs['documents']:
-  print(doc)

 )
 from langchain_community.vectorstores import Chroma
+import time
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, DataCollatorForLanguageModeling
+from trl import SFTTrainer
+from peft import get_peft_model, LoraConfig, prepare_model_for_kbit_training
 # create the open-source embedding function
 embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
 print("There are", db._collection.count(), " docs in the collection")
+docs = db._collection.peek(db._collection.count())
+dataset = docs['documents']
+if torch.cuda.is_available():
+    # torch.set_default_device("cuda")
+    print("Cuda is available")
+base_model_id = "microsoft/phi-2"
+# base_model_id = "abacaj/phi-2-super"
+# base_model_id = "./results"
+tokenizer = AutoTokenizer.from_pretrained(base_model_id)
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
+    print("pad_token was missing and has been set to eos_token")
+# Configuration to load model in 4-bit quantized
+bnb_config = BitsAndBytesConfig(load_in_4bit=True,
+                                bnb_4bit_quant_type='nf4',
+                                #bnb_4bit_compute_dtype='float16',
+                                bnb_4bit_compute_dtype=torch.bfloat16,
+                                bnb_4bit_use_double_quant=False)
+model = AutoModelForCausalLM.from_pretrained(base_model_id, attn_implementation="flash_attention_2", quantization_config=bnb_config, torch_dtype="auto")
+print(model)
+# Gradient checkpointing to save memory
+model.gradient_checkpointing_enable()
+# Freeze base model layers and cast layernorm in fp32
+model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)
+peft_config = LoraConfig(
+    r=64,
+    lora_alpha=64,
+    target_modules= ["q_proj","k_proj","v_proj","dense","fc2","fc1"],
+    bias="none",
+    lora_dropout=0.05,
+    task_type="CAUSAL_LM",
+)
+training_args = TrainingArguments(
+    output_dir='./results',  # Output directory for checkpoints and predictions
+    overwrite_output_dir=True, # Overwrite the content of the output directory
+    per_device_train_batch_size=2,  # Batch size for training
+    per_device_eval_batch_size=2,  # Batch size for evaluation
+    gradient_accumulation_steps=5, # number of steps before optimizing
+    gradient_checkpointing=True,   # Enable gradient checkpointing
+    gradient_checkpointing_kwargs={"use_reentrant": False},
+    warmup_steps=10,  # Number of warmup steps
+    #max_steps=1000,  # Total number of training steps
+    num_train_epochs=20,  # Number of training epochs
+    learning_rate=5e-5,  # Learning rate
+    weight_decay=0.01,  # Weight decay
+    optim="paged_adamw_8bit", #Keep the optimizer state and quantize it
+    bf16=True, #Use mixed precision training
+    #For logging and saving
+    logging_dir='./logs',
+    logging_strategy="epoch",
+    logging_steps=10,
+    save_strategy="epoch",
+    save_steps=10,
+    save_total_limit=2,  # Limit the total number of checkpoints
+    evaluation_strategy="epoch",
+    eval_steps=10,
+    load_best_model_at_end=True, # Load the best model at the end of training
+    lr_scheduler_type="linear",
+)
+def formatting_func(doc):
+    return doc
+trainer = SFTTrainer(
+    model=model,
+    train_dataset=dataset,
+    eval_dataset=dataset,
+    peft_config=peft_config,
+    args=training_args,
+    max_seq_length=1024,
+    packing=True,
+    formatting_func=formatting_func
+)
+model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
+start_time = time.time()  # Record the start time
+trainer.train()
+end_time = time.time()  # Record the end time
+training_time = end_time - start_time  # Calculate total training time
+trainer.save_model("./results")
+print(f"Training completed in {training_time} seconds.")