File size: 7,007 Bytes
2dd3233
ecb014e
 
2dd3233
 
 
 
 
 
 
 
 
 
 
ecb014e
2dd3233
 
 
ecb014e
2dd3233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ecb014e
 
2dd3233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ecb014e
2dd3233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ecb014e
2dd3233
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# Import spaces first to ensure GPU resources are managed correctly
import spaces

# Import necessary libraries
import os
import json
import logging
import time
import torch
import bitsandbytes as bnb
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, DataCollatorForLanguageModeling, TrainingArguments, Trainer
from peft import PeftModel, LoraConfig
from transformers import BitsAndBytesConfig

# Configure logging
logging.basicConfig(level=logging.INFO, filename='training_log.log', filemode='w', format='%(name)s - %(levelname)s - %(message)s')
logging.info("Started the script")

# Load the Hugging Face API token from environment variables
HF_API_TOKEN = os.getenv('HF_API_TOKEN')

# Load the dataset
file_path = 'best_training_data.json'  # Adjust path as needed
logging.info(f"Loading dataset from {file_path}")
try:
    with open(file_path, 'r') as file:
        data = json.load(file)
    logging.info("Dataset loaded successfully")
except Exception as e:
    logging.error(f"Failed to load dataset: {e}")

# Convert the dataset to Hugging Face Dataset format
try:
    dataset = Dataset.from_dict({"text": [entry["text"] for entry in data]})
    logging.info("Dataset converted to Hugging Face Dataset format")
except Exception as e:
    logging.error(f"Failed to convert dataset: {e}")

# Initialize Tokenizer
try:
    tokenizer = AutoTokenizer.from_pretrained("SweatyCrayfish/llama-3-8b-quantized", token=HF_API_TOKEN)
    logging.info("Tokenizer loaded successfully")

    # Add padding token if not already present
    if tokenizer.pad_token is None:
        tokenizer.add_special_tokens({'pad_token': '[PAD]'})
        logging.info("Padding token added to the tokenizer")

    tokenizer.save_pretrained('.')
except Exception as e:
    logging.error(f"Failed to load or configure tokenizer: {e}")

# Tokenize the Dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding='max_length', max_length=1024, return_tensors='pt')

try:
    tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=["text"])
    logging.info("Dataset tokenized successfully")
except Exception as e:
    logging.error(f"Failed to tokenize the dataset: {e}")

# Setup Quantization Configuration
nf4_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Load the LLaMA 8B Model with Quantization
try:
    model = AutoModelForCausalLM.from_pretrained(
        "SweatyCrayfish/llama-3-8b-quantized",
        quantization_config=nf4_config,
        token=HF_API_TOKEN,
        device_map="auto"
    )

    model.resize_token_embeddings(len(tokenizer))
    model.gradient_checkpointing_enable()
    model.config.use_cache = False  # Disable use_cache when using gradient checkpointing
    logging.info("Model initialized and resized embeddings")

    # Set up LoRa
    lora_config = LoraConfig(
        r=64,
        lora_alpha=16,
        lora_dropout=0.1,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj']
    )
    model = PeftModel(model, lora_config)
    logging.info("LoRa configuration applied to the model")

    # Ensure only floating point parameters require gradients
    for param in model.parameters():
        if param.dtype in [torch.float16, torch.float32, torch.bfloat16, torch.complex64, torch.complex128]:
            param.requires_grad = True
    logging.info("Model parameters configured for gradient computation")
except Exception as e:
    logging.error(f"Failed to initialize the model: {e}")

# Setup Training Arguments
try:
    training_args = TrainingArguments(
        output_dir="training_results",
        evaluation_strategy="no",  # Disable evaluation
        save_strategy="epoch",  # Save only at the end of each epoch
        learning_rate=2e-4,
        per_device_train_batch_size=5,
        gradient_accumulation_steps=4,
        num_train_epochs=12,
        weight_decay=0.01,
        save_total_limit=1,
        logging_dir="training_logs",
        logging_steps=50,
        fp16=False,
        bf16=True,
        load_best_model_at_end=False,  # Do not load the best model
        greater_is_better=False,
        report_to="none"  # Disable reporting to external services
    )
    logging.info("Training arguments configured successfully")
except Exception as e:
    logging.error(f"Failed to configure training arguments: {e}")

# Initialize the Trainer
try:
    data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
        data_collator=data_collator
    )
    logging.info("Trainer initialized successfully")
except Exception as e:
    logging.error(f"Failed to initialize the Trainer: {e}")

# Implementing 120-Second Segmented Training
@spaces.GPU(duration=120)
def segmented_train(trainer):
    start_time = time.time()
    while time.time() - start_time < 120:
        try:
            trainer.train()
        except torch.cuda.OutOfMemoryError as e:
            logging.error(f"Out of memory error: {e}")
            break
        except Exception as e:
            logging.error(f"Training error: {e}")
            break
    trainer.save_state()

try:
    segmented_train(trainer)
    logging.info("Model training completed successfully")
except Exception as e:
    logging.error(f"Training failed: {e}")
    import traceback
    traceback.print_exc()

# Save the Model
try:
    model.save_pretrained("llama3-8b-chat-finetuned-final-version")
    tokenizer.save_pretrained("llama3-8b-chat-finetuned-final-version")
    logging.info("Final fine-tuned model and tokenizer saved successfully")
except Exception as e:
    logging.error(f"Failed to save the final fine-tuned model: {e}")

# Inference Function
@spaces.GPU
def generate_response(prompt, model, tokenizer, max_length=128, min_length=20, temperature=0.7, top_k=50, top_p=0.9):
    try:
        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
        with torch.no_grad():
            outputs = model.generate(
                inputs.input_ids,
                max_length=max_length,
                min_length=min_length,
                do_sample=True,
                temperature=temperature,
                top_k=top_k,
                top_p=top_p,
                repetition_penalty=1.3,
                no_repeat_ngram_size=3,
                eos_token_id=tokenizer.eos_token_id
            )
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        return response
    except Exception as e:
        logging.error(f"Failed to generate response: {e}")
        return ""

# Example Usage
prompt = "bro did u talk with DK today"
response = generate_response(prompt, model, tokenizer)
print(response)
logging.info(f"Generated response: {response}")