m7mdal7aj commited on
Commit
17c1e65
1 Parent(s): 2770d90

Upload 11 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ my_model/fine_tuner/fine_tuning_data/fine_tuning_data_detic.csv filter=lfs diff=lfs merge=lfs -text
37
+ my_model/fine_tuner/fine_tuning_data/fine_tuning_data_yolov5.csv filter=lfs diff=lfs merge=lfs -text
my_model/LLAMA2/LLAMA2_config.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Configuration parameters for LLaMA-2 model
2
+ import torch
3
+ import os
4
+
5
+ MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"
6
+ TOKENIZER_NAME = "meta-llama/Llama-2-7b-chat-hf"
7
+ QUANTIZATION = '4bit' # Options: '4bit', '8bit', or None
8
+ FROM_SAVED = False
9
+ MODEL_PATH = None
10
+ TRUST_REMOTE = False
11
+ USE_FAST = True
12
+ ADD_EOS_TOKEN = True
13
+ # ACCESS_TOKEN = "xx" # My HF Read-only Token, to be added here if needed
14
+ huggingface_token = os.getenv('HUGGINGFACE_TOKEN') # for use as a secret on hf space
15
+ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
my_model/LLAMA2/LLAMA2_model.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
+ from typing import Optional
4
+ import bitsandbytes # only for using on GPU
5
+ import accelerate # only for using on GPU
6
+ from my_model.LLAMA2 import LLAMA2_config as config # Importing LLAMA2 configuration file
7
+ import warnings
8
+
9
+ # Suppress only FutureWarning from transformers
10
+ warnings.filterwarnings("ignore", category=FutureWarning, module="transformers")
11
+
12
+
13
+ class Llama2ModelManager:
14
+ """
15
+ Manages loading and configuring the LLaMA-2 model and tokenizer.
16
+
17
+ Attributes:
18
+ device (str): Device to use for the model ('cuda' or 'cpu').
19
+ model_name (str): Name or path of the pre-trained model.
20
+ tokenizer_name (str): Name or path of the tokenizer.
21
+ quantization (str): Specifies the quantization level ('4bit', '8bit', or None).
22
+ from_saved (bool): Flag to load the model from a saved path.
23
+ model_path (str or None): Path to the saved model if `from_saved` is True.
24
+ trust_remote (bool): Whether to trust remote code when loading the tokenizer.
25
+ use_fast (bool): Whether to use the fast version of the tokenizer.
26
+ add_eos_token (bool): Whether to add an EOS token to the tokenizer.
27
+ access_token (str): Access token for Hugging Face Hub.
28
+ model (AutoModelForCausalLM or None): Loaded model, initially None.
29
+ """
30
+
31
+ def __init__(self) -> None:
32
+ """
33
+ Initializes the Llama2ModelManager class with configuration settings.
34
+ """
35
+ self.device: str = config.DEVICE
36
+ self.model_name: str = config.MODEL_NAME
37
+ self.tokenizer_name: str = config.TOKENIZER_NAME
38
+ self.quantization: str = config.QUANTIZATION
39
+ self.from_saved: bool = config.FROM_SAVED
40
+ self.model_path: Optional[str] = config.MODEL_PATH
41
+ self.trust_remote: bool = config.TRUST_REMOTE
42
+ self.use_fast: bool = config.USE_FAST
43
+ self.add_eos_token: bool = config.ADD_EOS_TOKEN
44
+ self.access_token: str = config.ACCESS_TOKEN
45
+ self.model: Optional[AutoModelForCausalLM] = None
46
+
47
+ def create_bnb_config(self) -> BitsAndBytesConfig:
48
+ """
49
+ Creates a BitsAndBytes configuration based on the quantization setting.
50
+
51
+ Returns:
52
+ BitsAndBytesConfig: Configuration for BitsAndBytes optimized model.
53
+ """
54
+ if self.quantization == '4bit':
55
+ return BitsAndBytesConfig(
56
+ load_in_4bit=True,
57
+ bnb_4bit_use_double_quant=True,
58
+ bnb_4bit_quant_type="nf4",
59
+ bnb_4bit_compute_dtype=torch.bfloat16
60
+ )
61
+ elif self.quantization == '8bit':
62
+ return BitsAndBytesConfig(
63
+ load_in_8bit=True,
64
+ bnb_8bit_use_double_quant=True,
65
+ bnb_8bit_quant_type="nf4",
66
+ bnb_8bit_compute_dtype=torch.bfloat16
67
+ )
68
+
69
+ def load_model(self) -> AutoModelForCausalLM:
70
+ """
71
+ Loads the LLaMA-2 model based on the specified configuration. If the model is already loaded, returns the existing model.
72
+
73
+ Returns:
74
+ AutoModelForCausalLM: Loaded LLaMA-2 model.
75
+ """
76
+ if self.model is not None:
77
+ print("Model is already loaded.")
78
+ return self.model
79
+
80
+ if self.from_saved:
81
+ self.model = AutoModelForCausalLM.from_pretrained(self.model_path, device_map="auto")
82
+ else:
83
+ bnb_config = None if self.quantization is None else self.create_bnb_config()
84
+ self.model = AutoModelForCausalLM.from_pretrained(self.model_name, device_map="auto",
85
+ quantization_config=bnb_config,
86
+ torch_dtype=torch.float16,
87
+ token=self.access_token)
88
+
89
+ if self.model is not None:
90
+ print(f"LLAMA2 Model loaded successfully in {self.quantization} quantization.")
91
+ else:
92
+ print("LLAMA2 Model failed to load.")
93
+ return self.model
94
+
95
+ def load_tokenizer(self) -> AutoTokenizer:
96
+ """
97
+ Loads the tokenizer for the LLaMA-2 model with the specified configuration.
98
+
99
+ Returns:
100
+ AutoTokenizer: Loaded tokenizer for LLaMA-2 model.
101
+ """
102
+ self.tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name, use_fast=self.use_fast,
103
+ token=self.access_token,
104
+ trust_remote_code=self.trust_remote,
105
+ add_eos_token=self.add_eos_token)
106
+
107
+ if self.tokenizer is not None:
108
+ print(f"LLAMA2 Tokenizer loaded successfully.")
109
+ else:
110
+ print("LLAMA2 Tokenizer failed to load.")
111
+
112
+ return self.tokenizer
113
+
114
+ def load_model_and_tokenizer(self, for_fine_tuning):
115
+ """
116
+ Loads LLAMa2 model and tokenizer in one method and adds special tokens if the purpose if fine tuning.
117
+ :param for_fine_tuning: YES(True) / NO (False)
118
+ :return: LLAMA2 Model and Tokenizer
119
+ """
120
+ if for_fine_tuning:
121
+ self.tokenizer = self.load_tokenizer()
122
+ self.model = self.load_model()
123
+ self.add_special_tokens()
124
+ else:
125
+ self.tokenizer = self.load_tokenizer()
126
+ self.model = self.load_model()
127
+
128
+ return self.model, self.tokenizer
129
+
130
+
131
+ def add_special_tokens(self, tokens: Optional[list[str]] = None) -> None:
132
+ """
133
+ Adds special tokens to the tokenizer and updates the model's token embeddings if the model is loaded,
134
+ only if the tokenizer is loaded.
135
+
136
+ Args:
137
+ tokens (list of str, optional): Special tokens to add. Defaults to a predefined set.
138
+
139
+ Returns:
140
+ None
141
+ """
142
+ if self.tokenizer is None:
143
+ print("Tokenizer is not loaded. Cannot add special tokens.")
144
+ return
145
+
146
+ if tokens is None:
147
+ tokens = ['[CAP]', '[/CAP]', '[QES]', '[/QES]', '[OBJ]', '[/OBJ]']
148
+
149
+ # Update the tokenizer with new tokens
150
+ print(f"Original vocabulary size: {len(self.tokenizer)}")
151
+ print(f"Adding the following tokens: {tokens}")
152
+ self.tokenizer.add_tokens(tokens, special_tokens=True)
153
+ self.tokenizer.add_special_tokens({'pad_token': '<pad>'})
154
+ print(f"Adding Padding Token {self.tokenizer.pad_token}")
155
+ self.tokenizer.padding_side = "right"
156
+ print(f'Padding side: {self.tokenizer.padding_side}')
157
+
158
+ # Resize the model token embeddings if the model is loaded
159
+ if self.model is not None:
160
+ self.model.resize_token_embeddings(len(self.tokenizer))
161
+ self.model.config.pad_token_id = self.tokenizer.pad_token_id
162
+
163
+ print(f'Updated Vocabulary Size: {len(self.tokenizer)}')
164
+ print(f'Padding Token: {self.tokenizer.pad_token}')
165
+ print(f'Special Tokens: {self.tokenizer.added_tokens_decoder}')
166
+
167
+
168
+ if __name__ == "__main__":
169
+ pass
170
+ LLAMA2_manager = Llama2ModelManager()
171
+ LLAMA2_model = LLAMA2_manager.load_model() # First time loading the model
172
+ LLAMA2_tokenizer = LLAMA2_manager.load_tokenizer()
173
+ LLAMA2_manager.add_special_tokens(LLAMA2_model, LLAMA2_tokenizer)
my_model/extract_objects.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from object_detection import ObjectDetector
2
+ import os
3
+
4
+ def detect_objects_for_image(image_name, detector):
5
+
6
+ if os.path.exists(image_path):
7
+ image = detector.process_image(image_path)
8
+ detected_objects_str, _ = detector.detect_objects(image)
9
+ return detected_objects_str
10
+ else:
11
+ return "Image not found"
12
+
13
+ def add_detected_objects_to_dataframe(df, image_directory, detector):
14
+ """
15
+ Adds a column to the DataFrame with detected objects for each image specified in the 'image_name' column.
16
+
17
+ Parameters:
18
+ df (pd.DataFrame): DataFrame containing a column 'image_name' with image filenames.
19
+ image_directory (str): Path to the directory containing images.
20
+ detector (ObjectDetector): An instance of the ObjectDetector class.
21
+
22
+ Returns:
23
+ pd.DataFrame: The original DataFrame with an additional column 'detected_objects'.
24
+ """
25
+
26
+ # Ensure 'image_name' column exists in the DataFrame
27
+ if 'image_name' not in df.columns:
28
+ raise ValueError("DataFrame must contain an 'image_name' column.")
29
+
30
+ image_path = os.path.join(image_directory, image_name)
31
+
32
+ # Function to detect objects for a given image filename
33
+
34
+
35
+ # Apply the function to each row in the DataFrame
36
+ df['detected_objects'] = df['image_name'].apply(detect_objects_for_image)
37
+
38
+ return df
39
+
40
+ # Example usage (assuming the function will be used in a context where 'detector' is defined and configured):
41
+ # df_images = pd.DataFrame({"image_name": ["image1.jpg", "image2.jpg", ...]})
42
+ # image_directory = "path/to/image_directory"
43
+ # updated_df = add_detected_objects_to_dataframe(df_images, image_directory, detector)
44
+ # updated_df.head()
45
+
my_model/fine_tuner/fine_tuner.py ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Main Fine-Tuning Script for meta-llama/Llama-2-7b-chat-hf
3
+
4
+ # This script is the central executable for fine-tuning large language models, specifically designed for the LLAMA2
5
+ # model.
6
+ # It encompasses the entire process of fine-tuning, starting from data preparation to the final model training.
7
+ # The script leverages the 'FinetuningDataHandler' class for data loading, inspection, preparation, and splitting.
8
+ # This ensures that the dataset is correctly processed and prepared for effective training.
9
+
10
+ # The fine-tuning process is managed by the Finetuner class, which handles the training of the model using specific
11
+ # training arguments and datasets. Advanced configurations for Quantized Low-Rank Adaptation (QLoRA) and Parameter
12
+ # Efficient Fine-Tuning (PEFT) are utilized to optimize the training process on limited hardware resources.
13
+
14
+ # The script is designed to be executed as a standalone process, providing an end-to-end solution for fine-tuning
15
+ # LLMs. It is a part of a larger project aimed at optimizing the performance of language model to adapt to
16
+ # OK-VQA dataset.
17
+
18
+ # Ensure all dependencies are installed and the required files are in place before running this script.
19
+ # The configurations for the fine-tuning process are defined in the 'fine_tuning_config.py' file.
20
+
21
+ # ---------- Please run this file for the full fine-tuning process to start ----------#
22
+ # ---------- Please ensure this is run on a GPU ----------#
23
+
24
+
25
+ import torch
26
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, TRANSFORMERS_CACHE
27
+ from trl import SFTTrainer
28
+ from datasets import Dataset, load_dataset
29
+ from peft import LoraConfig, prepare_model_for_kbit_training, PeftModel
30
+ import fine_tuning_config as config
31
+ from typing import List
32
+ import bitsandbytes # only on GPU
33
+ import gc
34
+ import os
35
+ import shutil
36
+ from my_model.LLAMA2.LLAMA2_model import Llama2ModelManager
37
+ from fine_tuning_data_handler import FinetuningDataHandler
38
+
39
+
40
+ class QLoraConfig:
41
+ """
42
+ Configures QLoRA (Quantized Low-Rank Adaptation) parameters for efficient model fine-tuning.
43
+ LoRA allows adapting large language models with a minimal number of trainable parameters.
44
+
45
+ Attributes:
46
+ lora_config (LoraConfig): Configuration object for LoRA parameters.
47
+ """
48
+
49
+ def __init__(self) -> None:
50
+ """
51
+ Initializes QLoraConfig with specific LoRA parameters.
52
+
53
+ """
54
+ # please refer to config file 'fine_tuning_config.py' for QLORA arguments description.
55
+ self.lora_config = LoraConfig(
56
+ lora_alpha=config.LORA_ALPHA,
57
+ lora_dropout=config.LORA_DROPOUT,
58
+ r=config.LORA_R,
59
+ bias="none", # bias is already accounted for in LLAMA2 pre-trained model layers.
60
+ task_type="CAUSAL_LM",
61
+ target_modules=['up_proj', 'down_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj'] # modules for fine-tuning.
62
+ )
63
+
64
+
65
+ class Finetuner:
66
+ """
67
+ The Finetuner class manages the fine-tuning process of a pre-trained language model using specific
68
+ training arguments and datasets. It is designed to adapt a pre-trained model on a specific dataset
69
+ to enhance its performance on similar data.
70
+
71
+ This class not only facilitates the fine-tuning of LLAMA2 but also includes advanced
72
+ resource management capabilities. It provides methods for deleting model and trainer objects,
73
+ clearing GPU memory, and cleaning up Hugging Face's Transformers cache. These functionalities
74
+ make the Finetuner class especially useful in environments with limited computational resources
75
+ or when managing multiple models or training sessions.
76
+
77
+ Additionally, the class supports configurations for Quantized Low-Rank Adaptation (QLoRA)
78
+ to fine-tune models with minimal trainable parameters, and Parameter Efficient Fine-Tuning (PEFT)
79
+ for training efficiency on limited hardware.
80
+
81
+ Attributes:
82
+ base_model (AutoModelForCausalLM): The pre-trained language model to be fine-tuned.
83
+ tokenizer (AutoTokenizer): The tokenizer associated with the model.
84
+ train_dataset (Dataset): The dataset used for training.
85
+ eval_dataset (Dataset): The dataset used for evaluation.
86
+ training_arguments (TrainingArguments): Configuration for training the model.
87
+
88
+ Key Methods:
89
+ - load_LLAMA2_for_finetuning: Loads the LLAMA2 model and tokenizer for fine-tuning.
90
+ - train: Trains the model using PEFT configuration.
91
+ - delete_model: Deletes a specified model attribute.
92
+ - delete_trainer: Deletes a specified trainer object.
93
+ - clear_training_resources: Clears GPU memory.
94
+ - clear_cache_and_collect_garbage: Clears Transformers cache and performs garbage collection.
95
+ - find_all_linear_names: Identifies linear layer names suitable for LoRA application.
96
+ - print_trainable_parameters: Prints the number of trainable parameters in the model.
97
+ """
98
+
99
+ def __init__(self, train_dataset: Dataset, eval_dataset: Dataset) -> None:
100
+ """
101
+ Initializes the Finetuner class with the model, tokenizer, and datasets.
102
+
103
+ Args:
104
+ model (AutoModelForCausalLM): The pre-trained language model.
105
+ tokenizer (AutoTokenizer): The tokenizer for the model.
106
+ train_dataset (Dataset): The dataset for training the model.
107
+ eval_dataset (Dataset): The dataset for evaluating the model.
108
+ """
109
+
110
+ self.base_model, self.tokenizer = self.load_LLAMA2_for_finetuning()
111
+ self.merged_model = None
112
+ self.train_dataset = train_dataset
113
+ self.eval_dataset = eval_dataset
114
+ # please refer to config file 'fine_tuning_config.py' for training arguments description.
115
+ self.training_arguments = TrainingArguments(
116
+ output_dir=config.OUTPUT_DIR,
117
+ num_train_epochs=config.NUM_TRAIN_EPOCHS,
118
+ per_device_train_batch_size=config.PER_DEVICE_TRAIN_BATCH_SIZE,
119
+ per_device_eval_batch_size=config.PER_DEVICE_EVAL_BATCH_SIZE,
120
+ gradient_accumulation_steps=config.GRADIENT_ACCUMULATION_STEPS,
121
+ fp16=config.FP16,
122
+ bf16=config.BF16,
123
+ evaluation_strategy=config.Evaluation_STRATEGY,
124
+ eval_steps=config.EVALUATION_STEPS,
125
+ max_grad_norm=config.MAX_GRAD_NORM,
126
+ learning_rate=config.LEARNING_RATE,
127
+ weight_decay=config.WEIGHT_DECAY,
128
+ optim=config.OPTIM,
129
+ lr_scheduler_type=config.LR_SCHEDULER_TYPE,
130
+ max_steps=config.MAX_STEPS,
131
+ warmup_ratio=config.WARMUP_RATIO,
132
+ group_by_length=config.GROUP_BY_LENGTH,
133
+ save_steps=config.SAVE_STEPS,
134
+ logging_steps=config.LOGGING_STEPS,
135
+ report_to="tensorboard"
136
+ )
137
+
138
+ def load_LLAMA2_for_finetuning(self):
139
+ """
140
+ Loads the LLAMA2 model and tokenizer, specifically configured for fine-tuning.
141
+ This method ensures the model is ready to be adapted to a specific task or dataset.
142
+
143
+ Returns:
144
+ Tuple[AutoModelForCausalLM, AutoTokenizer]: The loaded model and tokenizer.
145
+ """
146
+
147
+ llm_manager = Llama2ModelManager()
148
+ base_model, tokenizer = llm_manager.load_model_and_tokenizer(for_fine_tuning=True)
149
+
150
+ return base_model, tokenizer
151
+
152
+ def find_all_linear_names(self) -> List[str]:
153
+ """
154
+ Identifies all linear layer names in the model that are suitable for applying LoRA.
155
+
156
+ Returns:
157
+ List[str]: A list of linear layer names.
158
+ """
159
+ cls = bitsandbytes.nn.Linear4bit
160
+ lora_module_names = set()
161
+ for name, module in self.base_model.named_modules():
162
+ if isinstance(module, cls):
163
+ names = name.split('.')
164
+ lora_module_names.add(names[0] if len(names) == 1 else names[-1])
165
+
166
+ # We dont want to train these two modules to avoid computational overhead.
167
+ lora_module_names -= {'lm_head', 'gate_proj'}
168
+ return list(lora_module_names)
169
+
170
+ def print_trainable_parameters(self, use_4bit: bool = False) -> None:
171
+ """
172
+ Calculates and prints the number of trainable parameters in the model.
173
+
174
+ Args:
175
+ use_4bit (bool): If true, calculates the parameter count considering 4-bit quantization.
176
+ """
177
+ trainable_params = sum(p.numel() for p in self.base_model.parameters() if p.requires_grad)
178
+ if use_4bit:
179
+ trainable_params /= 2
180
+
181
+ total_params = sum(p.numel() for p in self.base_model.parameters())
182
+ print(f"All Parameters: {total_params:,d} || Trainable Parameters: {trainable_params:,d} "
183
+ f"|| Trainable Parameters %: {100 * trainable_params / total_params:.2f}%")
184
+
185
+ def train(self, peft_config: LoraConfig) -> None:
186
+ """
187
+ Trains the model using the specified PEFT (Progressive Effort Fine-Tuning) configuration.
188
+
189
+ Args:
190
+ peft_config (LoraConfig): Configuration for the PEFT training process.
191
+ """
192
+ self.base_model.config.use_cache = False
193
+ # Set the pretraining_tp flag to 1 to enable the use of LoRA (Low-Rank Adapters) layers.
194
+ self.base_model.config.pretraining_tp = 1
195
+ # Prepare the model for k-bit training by quantizing the weights to 4 bits using bitsandbytes.
196
+ self.base_model = prepare_model_for_kbit_training(self.base_model)
197
+ self.trainer = SFTTrainer(
198
+ model=self.base_model,
199
+ train_dataset=self.train_dataset,
200
+ eval_dataset=self.eval_dataset,
201
+ peft_config=peft_config,
202
+ dataset_text_field='text',
203
+ max_seq_length=config.MAX_TOKEN_COUNT,
204
+ tokenizer=self.tokenizer,
205
+ args=self.training_arguments,
206
+ packing=config.PACKING
207
+ )
208
+ self.trainer.train()
209
+
210
+ def save_model(self):
211
+
212
+ """
213
+ Saves the fine-tuned model to the specified directory.
214
+
215
+ This method saves the model weights and configuration of the fine-tuned model.
216
+ The save directory and filename are determined by the configuration provided in
217
+ the 'fine_tuning_config.py' file. It is useful for persisting the fine-tuned model
218
+ for later use or evaluation.
219
+
220
+ The saved model can be easily loaded using Hugging Face's model loading utilities.
221
+ """
222
+
223
+ self.fine_tuned_adapter_name = config.ADAPTER_SAVE_NAME
224
+ self.trainer.model.save_pretrained(self.fine_tuned_adapter_name)
225
+
226
+ def merge_weights(self):
227
+ """
228
+ Merges the weights of the fine-tuned adapter with the base model.
229
+
230
+ This method integrates the fine-tuned adapter weights into the base model,
231
+ resulting in a single consolidated model. The merged model can then be used
232
+ for inference or further training.
233
+
234
+ After merging, the weights of the adapter are no longer separate from the
235
+ base model, enabling more efficient storage and deployment. The merged model
236
+ is stored in the 'self.merged_model' attribute of the Finetuner class.
237
+ """
238
+
239
+ self.merged_model = PeftModel.from_pretrained(self.base_model, self.fine_tuned_adapter_name)
240
+ self.merged_model = self.merged_model.merge_and_unload()
241
+
242
+ def delete_model(self, model_name: str):
243
+ """
244
+ Deletes a specified model attribute.
245
+
246
+ Args:
247
+ model_name (str): The name of the model attribute to delete.
248
+ """
249
+ try:
250
+ if hasattr(self, model_name) and getattr(self, model_name) is not None:
251
+ delattr(self, model_name)
252
+ print(f"Model '{model_name}' has been deleted.")
253
+ else:
254
+ print(f"Warning: Model '{model_name}' has already been cleared or does not exist.")
255
+ except Exception as e:
256
+ print(f"Error occurred while deleting model '{model_name}': {str(e)}")
257
+
258
+ def delete_trainer(self, trainer_name: str):
259
+ """
260
+ Deletes a specified trainer object.
261
+
262
+ Args:
263
+ trainer_name (str): The name of the trainer object to delete.
264
+ """
265
+ try:
266
+ if hasattr(self, trainer_name) and getattr(self, trainer_name) is not None:
267
+ delattr(self, trainer_name)
268
+ print(f"Trainer object '{trainer_name}' has been deleted.")
269
+ else:
270
+ print(f"Warning: Trainer object '{trainer_name}' has already been cleared or does not exist.")
271
+ except Exception as e:
272
+ print(f"Error occurred while deleting trainer object '{trainer_name}': {str(e)}")
273
+
274
+ def clear_training_resources(self):
275
+ """
276
+ Clears GPU memory.
277
+ """
278
+ try:
279
+ if torch.cuda.is_available():
280
+ torch.cuda.empty_cache()
281
+ print("GPU memory has been cleared.")
282
+ except Exception as e:
283
+ print(f"Error occurred while clearing GPU memory: {str(e)}")
284
+
285
+ def clear_cache_and_collect_garbage(self):
286
+ """
287
+ Clears Hugging Face's Transformers cache and runs garbage collection.
288
+ """
289
+ try:
290
+ if os.path.exists(TRANSFORMERS_CACHE):
291
+ shutil.rmtree(TRANSFORMERS_CACHE, ignore_errors=True)
292
+ print("Transformers cache has been cleared.")
293
+
294
+ gc.collect()
295
+ print("Garbage collection has been executed.")
296
+ except Exception as e:
297
+ print(f"Error occurred while clearing cache and collecting garbage: {str(e)}")
298
+
299
+ def fine_tune(save_fine_tuned_adapter=False, merge=False, delete_trainer_after_fine_tune=False):
300
+ """
301
+ Conducts the fine-tuning process of a pre-trained language model using specified configurations.
302
+ This function encompasses the complete workflow of fine-tuning, including data handling, training,
303
+ and optional steps like saving the fine-tuned model and merging weights.
304
+
305
+ Args:
306
+ save_fine_tuned_adapter (bool): If True, saves the fine-tuned adapter after training.
307
+ merge (bool): If True, merges the weights of the fine-tuned adapter into the base model.
308
+ delete_trainer_after_fine_tune (bool): If True, deletes the trainer object after fine-tuning to free up resources.
309
+
310
+ Returns:
311
+ The fine-tuned model after the fine-tuning process. This could be either the merged model
312
+ or the trained model based on the provided arguments.
313
+
314
+ The function initiates by preparing the training and evaluation datasets using the `FinetuningDataHandler`.
315
+ It then sets up the QLoRA configuration for the fine-tuning process. The actual training is carried out by
316
+ the `Finetuner` class. Post training, based on the arguments, the function can save the fine-tuned model,
317
+ merge the adapter weights with the base model, and clean up resources by deleting the trainer object.
318
+ """
319
+
320
+ data_handler = FinetuningDataHandler()
321
+ fine_tuning_data_train, fine_tuning_data_eval = data_handler.inspect_prepare_split_data()
322
+ qlora = QLoraConfig()
323
+ peft_config = qlora.lora_config
324
+ tuner = Finetuner(fine_tuning_data_train, fine_tuning_data_eval)
325
+ tuner.train(peft_config=peft_config)
326
+ if save_fine_tuned_adapter:
327
+ tuner.save_model()
328
+
329
+ if merge:
330
+ tuner.merge_weights()
331
+
332
+ if delete_trainer_after_fine_tune:
333
+ tuner.delete_trainer("trainer")
334
+
335
+ tuner.delete_model("base_model") # We always delete this as it is not required after the merger.
336
+
337
+ if save_fine_tuned_adapter:
338
+ tuner.save_model()
339
+ if tuner.merged_model is not None:
340
+ return tuner.merged_model
341
+ else:
342
+ return tuner.trainer.model
343
+
344
+
345
+
346
+ if __name__ == "__main__":
347
+ fine_tune()
my_model/fine_tuner/fine_tuning_config.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Configurable parameters for fine-tuning
2
+
3
+ import os
4
+
5
+
6
+ # *** Dataset ***
7
+ # Base directory where the script is running
8
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
9
+ # Path to the folder containing the data files, relative to the configuration file
10
+ DATA_FOLDER = 'fine_tuning_data'
11
+ # Full path to the data folder
12
+ DATA_FOLDER_PATH = os.path.join(BASE_DIR, DATA_FOLDER)
13
+ # Path to the dataset file (CSV format)
14
+ DATASET_FILE = os.path.join(DATA_FOLDER_PATH, 'fine_tuning_data_yolov5.csv') # or 'fine_tuning_data_detic.csv'
15
+
16
+
17
+ # *** Fine-tuned Adapter ***
18
+ TRAINED_ADAPTER_NAME = 'fine_tuned_adapter' # name of fine-tuned adapter.
19
+ FINE_TUNED_ADAPTER_FOLDER = 'fine_tuned_model'
20
+ FINE_TUNED_ADAPTER_PATH = os.path.join(BASE_DIR, FINE_TUNED_ADAPTER_FOLDER)
21
+ ADAPTER_SAVE_NAME = os.path.join(FINE_TUNED_ADAPTER_PATH, TRAINED_ADAPTER_NAME)
22
+
23
+
24
+ # Proportion of the dataset to include in the test split (e.g., 0.1 for 10%)
25
+ TEST_SIZE = 0.1
26
+
27
+ # Seed for random operations to ensure reproducibility
28
+ SEED = 123
29
+
30
+ # *** QLoRA Configuration Parameters ***
31
+ # LoRA attention dimension: number of additional parameters in each LoRA layer
32
+ LORA_R = 64
33
+
34
+ # Alpha parameter for LoRA scaling: controls the scaling of LoRA weights
35
+ LORA_ALPHA = 32
36
+
37
+ # Dropout probability for LoRA layers: probability of dropping a unit in LoRA layers
38
+ LORA_DROPOUT = 0.05
39
+
40
+
41
+
42
+ # *** TrainingArguments Configuration Parameters for the Transformers library ***
43
+ # Output directory to save model predictions and checkpoints
44
+ OUTPUT_DIR = "./TUNED_MODEL_LLAMA"
45
+
46
+ # Number of epochs to train the model
47
+ NUM_TRAIN_EPOCHS = 1
48
+
49
+ # Enable mixed-precision training using fp16 (set to True for faster training)
50
+ FP16 = True
51
+
52
+ # Enable mixed-precision training using bf16 (set to True if using an A100 GPU)
53
+ BF16 = False
54
+
55
+ # Batch size per GPU/Device for training
56
+ PER_DEVICE_TRAIN_BATCH_SIZE = 16
57
+
58
+ # Batch size per GPU/Device for evaluation
59
+ PER_DEVICE_EVAL_BATCH_SIZE = 8
60
+
61
+ # Number of update steps to accumulate gradients before performing a backward/update pass
62
+ GRADIENT_ACCUMULATION_STEPS = 1
63
+
64
+ # Enable gradient checkpointing to reduce memory usage at the cost of a slight slowdown
65
+ GRADIENT_CHECKPOINTING = True
66
+
67
+ # Maximum gradient norm for gradient clipping to prevent exploding gradients
68
+ MAX_GRAD_NORM = 0.3
69
+
70
+ # Initial learning rate for the AdamW optimizer
71
+ LEARNING_RATE = 2e-4
72
+
73
+ # Weight decay coefficient for regularization (applied to all layers except bias/LayerNorm weights)
74
+ WEIGHT_DECAY = 0.01
75
+
76
+ # Optimizer type, here using 'paged_adamw_8bit' for efficient training
77
+ OPTIM = "paged_adamw_8bit"
78
+
79
+ # Learning rate scheduler type (e.g., 'linear', 'cosine', etc.)
80
+ LR_SCHEDULER_TYPE = "linear"
81
+
82
+ # Maximum number of training steps, overrides 'num_train_epochs' if set to a positive number
83
+ # Setting MAX_STEPS = -1 in training arguments for SFTTrainer means that the number of steps will be determined by the
84
+ # number of epochs, the size of the dataset, the batch size, and the number of GPUs1. This is the default behavior
85
+ # when MAX_STEPS is not specified or set to a negative value2.
86
+ MAX_STEPS = -1
87
+
88
+ # Ratio of the total number of training steps used for linear warmup
89
+ WARMUP_RATIO = 0.03
90
+
91
+ # Whether to group sequences into batches with the same length to save memory and increase speed
92
+ GROUP_BY_LENGTH = False
93
+
94
+ # Save a model checkpoint every X update steps
95
+ SAVE_STEPS = 50
96
+
97
+ # Log training information every X update steps
98
+ LOGGING_STEPS = 25
99
+
100
+ PACKING = False
101
+
102
+ # Evaluation strategy during training ("steps", "epoch, "no")
103
+ Evaluation_STRATEGY = "steps"
104
+
105
+ # Number of update steps between two evaluations if `evaluation_strategy="steps"`.
106
+ # Will default to the same value as `logging_steps` if not set.
107
+ EVALUATION_STEPS = 5
108
+
109
+ # Maximum number of tokens per sample in the dataset
110
+ MAX_TOKEN_COUNT = 1024
111
+
112
+
113
+ if __name__=="__main__":
114
+ pass
my_model/fine_tuner/fine_tuning_data/fine_tuning_data_detic.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77089f24dd5414b0d1dcb5b8f3b34aac3daea86e68c1c70e2da6490482ac9d4b
3
+ size 54670629
my_model/fine_tuner/fine_tuning_data/fine_tuning_data_yolov5.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a44d22827c212a9d7a30bb3fd94cb7d7ad82a968a55eaa09e0ff5a61f85fde05
3
+ size 14547559
my_model/fine_tuner/fine_tuning_data/read_me.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ The data files 'fine_tuning_data_detic.csv' and 'fine_tuning_data_yolov5.csv' are the result of the preparation and
2
+ filtration after performing below steps:
3
+
4
+ - Generate the captions for all the images.
5
+ - Delete all samples with corrupted or rubbish data. (Please refer to the report for details)
6
+ - Run object detection models ('yolov5' and 'detic') and generate the corresponding objects for the images corresponding to the remaining samples.
7
+ - Convert all the question, answer, caption, objects together with the system prompt into the desired template for all
8
+ the samples (Please refer to the report for the detailed template design).
my_model/fine_tuner/fine_tuning_data_handler.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from my_model.utilities import is_pycharm
2
+ import seaborn as sns
3
+ from transformers import AutoTokenizer
4
+ from datasets import Dataset, load_dataset
5
+ import fine_tuning_config as config
6
+ from my_model.LLAMA2.LLAMA2_model import Llama2ModelManager
7
+ from typing import Tuple
8
+
9
+
10
+
11
+ class FinetuningDataHandler:
12
+ """
13
+ A class dedicated to handling data for fine-tuning language models. It manages loading,
14
+ inspecting, preparing, and splitting the dataset, specifically designed to filter out
15
+ data samples exceeding a specified token count limit. This is crucial for models with
16
+ token count constraints and it helps control the level of GPU RAM tolernace based on the number of tokens,
17
+ ensuring efficient and effective model fine-tuning.
18
+
19
+ Attributes:
20
+ tokenizer (AutoTokenizer): Tokenizer used for tokenizing the dataset.
21
+ dataset_file (str): File path to the dataset.
22
+ max_token_count (int): Maximum allowable token count per data sample.
23
+
24
+ Methods:
25
+ load_llm_tokenizer(): Loads the LLM tokenizer and adds special tokens, if not already loaded.
26
+ load_dataset(): Loads the dataset from a specified file path.
27
+ plot_tokens_count_distribution(token_counts, title): Plots the distribution of token counts in the dataset.
28
+ filter_dataset_by_indices(dataset, valid_indices): Filters the dataset based on valid indices, removing samples exceeding token limits.
29
+ get_token_counts(dataset): Calculates token counts for each sample in the dataset.
30
+ prepare_dataset(): Tokenizes and filters the dataset, preparing it for training. Also visualizes token count distribution before and after filtering.
31
+ split_dataset_for_train_eval(dataset): Divides the dataset into training and evaluation sets.
32
+ inspect_prepare_split_data(): Coordinates the data preparation and splitting process for fine-tuning.
33
+ """
34
+
35
+ def __init__(self, tokenizer: AutoTokenizer = None, dataset_file: str = config.DATASET_FILE) -> None:
36
+ """
37
+ Initializes the FinetuningDataHandler class.
38
+
39
+ Args:
40
+ tokenizer (AutoTokenizer): Tokenizer to use for tokenizing the dataset.
41
+ dataset_file (str): Path to the dataset file.
42
+ """
43
+ self.tokenizer = tokenizer # The tokenizer used for processing the dataset.
44
+ self.dataset_file = dataset_file # Path to the fine-tuning dataset file.
45
+ self.max_token_count = config.MAX_TOKEN_COUNT # Max token count for filtering.
46
+
47
+ def load_llm_tokenizer(self):
48
+ """
49
+ Loads the LLM tokenizer and adds special tokens, if not already loaded.
50
+ If the tokenizer is already loaded, this method does nothing.
51
+ """
52
+
53
+ if self.tokenizer is None:
54
+ llm_manager = Llama2ModelManager() # Initialize Llama2 model manager.
55
+ # we only need the tokenizer for the data inspection not the model itself.
56
+ self.tokenizer = llm_manager.load_tokenizer()
57
+ llm_manager.add_special_tokens() # Add special tokens specific to LLAMA2 vocab for efficient tokenization.
58
+
59
+ def load_dataset(self) -> Dataset:
60
+ """
61
+ Loads the dataset from the specified file path. The dataset is expected to be in CSV format.
62
+
63
+ Returns:
64
+ Dataset: The loaded dataset, ready for processing.
65
+ """
66
+ return load_dataset('csv', data_files=self.dataset_file)
67
+
68
+ def plot_tokens_count_distribution(self, token_counts: list, title: str = "Token Count Distribution") -> None:
69
+ """
70
+ Plots the distribution of token counts in the dataset for visualization purposes.
71
+
72
+ Args:
73
+ token_counts (list): List of token counts, each count representing the number of tokens in a dataset sample.
74
+ title (str): Title for the plot, highlighting the nature of the distribution.
75
+ """
76
+
77
+ if is_pycharm(): # Ensuring compatibility with PyCharm's environment for interactive plot.
78
+ import matplotlib
79
+ matplotlib.use('TkAgg') # Set the backend to 'TkAgg'
80
+ import matplotlib.pyplot as plt
81
+ sns.set_style("whitegrid")
82
+ plt.figure(figsize=(15, 6))
83
+ plt.hist(token_counts, bins=50, color='#3498db', edgecolor='black')
84
+ plt.title(title, fontsize=16)
85
+ plt.xlabel("Number of Tokens", fontsize=14)
86
+ plt.ylabel("Number of Samples", fontsize=14)
87
+ plt.xticks(fontsize=12)
88
+ plt.yticks(fontsize=12)
89
+ plt.tight_layout()
90
+ plt.show()
91
+
92
+ def filter_dataset_by_indices(self, dataset: Dataset, valid_indices: list) -> Dataset:
93
+ """
94
+ Filters the dataset based on a list of valid indices. This method is used to exclude
95
+ data samples that have a token count exceeding the specified maximum token count.
96
+
97
+ Args:
98
+ dataset (Dataset): The dataset to be filtered.
99
+ valid_indices (list): Indices of samples with token counts within the limit.
100
+
101
+ Returns:
102
+ Dataset: Filtered dataset containing only samples with valid indices.
103
+ """
104
+ return dataset['train'].select(valid_indices) # Select only samples with valid indices based on token count.
105
+
106
+ def get_token_counts(self, dataset):
107
+ """
108
+ Calculates and returns the token counts for each sample in the dataset.
109
+ This function assumes the dataset has a 'train' split and a 'text' field.
110
+
111
+ Args:
112
+ dataset (Dataset): The dataset for which to count tokens.
113
+
114
+ Returns:
115
+ List[int]: List of token counts per sample in the dataset.
116
+ """
117
+
118
+ if 'train' in dataset:
119
+ return [len(self.tokenizer.tokenize(s)) for s in dataset["train"]["text"]]
120
+ else:
121
+ # After filtering the samples with unacceptable token count, the dataset is already
122
+ # dataset = dataset['train']
123
+ return [len(self.tokenizer.tokenize(s)) for s in dataset["text"]]
124
+
125
+ def prepare_dataset(self) -> Tuple[Dataset, Dataset]:
126
+ """
127
+ Prepares the dataset for fine-tuning by tokenizing the data and filtering out samples
128
+ that exceed the maximum used context window (configurable through max_token_count).
129
+ It also visualizes the token count distribution before and after filtering.
130
+
131
+ Returns:
132
+ Tuple[Dataset, Dataset]: The train and evaluate datasets, post-filtering.
133
+ """
134
+ dataset = self.load_dataset()
135
+ self.load_llm_tokenizer()
136
+
137
+ # Count tokens in each dataset sample before filtering
138
+ token_counts_before_filtering = self.get_token_counts(dataset)
139
+ # Plot token count distribution before filtering for visualization.
140
+ self.plot_tokens_count_distribution(token_counts_before_filtering, "Token Count Distribution Before Filtration")
141
+ # Identify valid indices based on max token count.
142
+ valid_indices = [i for i, count in enumerate(token_counts_before_filtering) if count <= self.max_token_count]
143
+ # Filter the dataset to exclude samples with excessive token counts.
144
+ filtered_dataset = self.filter_dataset_by_indices(dataset, valid_indices)
145
+
146
+ token_counts_after_filtering = self.get_token_counts(filtered_dataset)
147
+ self.plot_tokens_count_distribution(token_counts_after_filtering, "Token Count Distribution After Filtration")
148
+
149
+ return self.split_dataset_for_train_eval(filtered_dataset) # split the dataset into training and evaluation.
150
+
151
+ def split_dataset_for_train_eval(self, dataset) -> Tuple[Dataset, Dataset]:
152
+ """
153
+ Splits the dataset into training and evaluation datasets.
154
+
155
+ Args:
156
+ dataset (Dataset): The dataset to split.
157
+
158
+ Returns:
159
+ tuple[Dataset, Dataset]: The split training and evaluation datasets.
160
+ """
161
+ split_data = dataset.train_test_split(test_size=config.TEST_SIZE, shuffle=True, seed=config.SEED)
162
+ train_data, eval_data = split_data['train'], split_data['test']
163
+ return train_data, eval_data
164
+
165
+ def inspect_prepare_split_data(self) -> tuple[Dataset, Dataset]:
166
+ """
167
+ Orchestrates the process of inspecting, preparing, and splitting the dataset for fine-tuning.
168
+
169
+ Returns:
170
+ tuple[Dataset, Dataset]: The prepared training and evaluation datasets.
171
+ """
172
+ return self.prepare_dataset()
173
+
174
+
175
+ # Example usage
176
+ if __name__ == "__main__":
177
+
178
+ # Please uncomment the below lines to test the data prep.
179
+ #data_handler = FinetuningDataHandler()
180
+ #fine_tuning_data_train, fine_tuning_data_eval = data_handler.inspect_prepare_split_data()
181
+ #print(fine_tuning_data_train, fine_tuning_data_eval)
182
+ pass
my_model/object_detection.py ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from transformers import AutoImageProcessor, AutoModelForObjectDetection
3
+ import torch
4
+ from PIL import Image, ImageDraw, ImageFont
5
+ import numpy as np
6
+ import cv2
7
+ import os
8
+ from utilities import get_path, show_image
9
+
10
+
11
+ class ObjectDetector:
12
+ """
13
+ A class for detecting objects in images using models like Detic and YOLOv5.
14
+
15
+ This class supports loading and using different object detection models to identify objects
16
+ in images and draw bounding boxes around them.
17
+
18
+ Attributes:
19
+ model (torch.nn.Module): The loaded object detection model.
20
+ processor (transformers.AutoImageProcessor): Processor for the Detic model.
21
+ model_name (str): Name of the model used for detection.
22
+ """
23
+
24
+ def __init__(self):
25
+ """
26
+ Initializes the ObjectDetector class with default values.
27
+ """
28
+
29
+ self.model = None
30
+ self.processor = None
31
+ self.model_name = None
32
+
33
+ def load_model(self, model_name='detic', pretrained=True, model_version='yolov5s'):
34
+ """
35
+ Load the specified object detection model.
36
+
37
+ Args:
38
+ model_name (str): Name of the model to load. Options are 'detic' and 'yolov5'.
39
+ pretrained (bool): Boolean indicating if a pretrained model should be used.
40
+ model_version (str): Version of the YOLOv5 model, applicable only when using YOLOv5.
41
+
42
+ Raises:
43
+ ValueError: If an unsupported model name is provided.
44
+ """
45
+
46
+ self.model_name = model_name
47
+ if model_name == 'detic':
48
+ self._load_detic_model(pretrained)
49
+ elif model_name == 'yolov5':
50
+ self._load_yolov5_model(pretrained, model_version)
51
+ else:
52
+ raise ValueError(f"Unsupported model name: {model_name}")
53
+
54
+ def _load_detic_model(self, pretrained):
55
+ """
56
+ Load the Detic model.
57
+
58
+ Args:
59
+ pretrained (bool): If True, load a pretrained model.
60
+ """
61
+
62
+ try:
63
+ model_path = get_path('deformable-detr-detic', 'models')
64
+ self.processor = AutoImageProcessor.from_pretrained(model_path)
65
+ self.model = AutoModelForObjectDetection.from_pretrained(model_path)
66
+ except Exception as e:
67
+ print(f"Error loading Detic model: {e}")
68
+ raise
69
+
70
+ def _load_yolov5_model(self, pretrained, model_version):
71
+ """
72
+ Load the YOLOv5 model.
73
+
74
+ Args:
75
+ pretrained (bool): If True, load a pretrained model.
76
+ model_version (str): Version of the YOLOv5 model.
77
+ """
78
+
79
+ try:
80
+ model_path = get_path('yolov5', 'models')
81
+ if model_path and os.path.exists(model_path):
82
+ self.model = torch.hub.load(model_path, model_version, pretrained=pretrained, source='local')
83
+ else:
84
+ self.model = torch.hub.load('ultralytics/yolov5', model_version, pretrained=pretrained)
85
+ except Exception as e:
86
+ print(f"Error loading YOLOv5 model: {e}")
87
+ raise
88
+
89
+ def process_image(self, image_path):
90
+ """
91
+ Process the image from the given path.
92
+
93
+ Args:
94
+ image_path (str): Path to the image file.
95
+
96
+ Returns:
97
+ Image.Image: Processed image in RGB format.
98
+
99
+ Raises:
100
+ Exception: If an error occurs during image processing.
101
+ """
102
+
103
+ try:
104
+ with Image.open(image_path) as image:
105
+ return image.convert("RGB")
106
+ except Exception as e:
107
+ print(f"Error processing image: {e}")
108
+ raise
109
+
110
+ def detect_objects(self, image, threshold=0.4):
111
+ """
112
+ Detect objects in the given image using the loaded model.
113
+
114
+ Args:
115
+ image (Image.Image): Image in which to detect objects.
116
+ threshold (float): Model detection confidence.
117
+
118
+ Returns:
119
+ tuple: A tuple containing a string representation and a list of detected objects.
120
+
121
+ Raises:
122
+ ValueError: If the model is not loaded or the model name is unsupported.
123
+ """
124
+
125
+ if self.model_name == 'detic':
126
+ return self._detect_with_detic(image, threshold)
127
+ elif self.model_name == 'yolov5':
128
+ return self._detect_with_yolov5(image, threshold)
129
+ else:
130
+ raise ValueError("Model not loaded or unsupported model name")
131
+
132
+ def _detect_with_detic(self, image, threshold):
133
+ """
134
+ Detect objects using the Detic model.
135
+
136
+ Args:
137
+ image (Image.Image): The image in which to detect objects.
138
+ threshold (float): The confidence threshold for detections.
139
+
140
+ Returns:
141
+ tuple: A tuple containing a string representation and a list of detected objects.
142
+ Each object in the list is represented as a tuple (label_name, box_rounded, certainty).
143
+ """
144
+
145
+ inputs = self.processor(images=image, return_tensors="pt")
146
+ outputs = self.model(**inputs)
147
+ target_sizes = torch.tensor([image.size[::-1]])
148
+ results = self.processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=threshold)[
149
+ 0]
150
+
151
+ detected_objects_str = ""
152
+ detected_objects_list = []
153
+ for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
154
+ if score >= threshold:
155
+ label_name = self.model.config.id2label[label.item()]
156
+ box_rounded = [round(coord, 2) for coord in box.tolist()]
157
+ certainty = round(score.item() * 100, 2)
158
+ detected_objects_str += f"{{object: {label_name}, bounding box: {box_rounded}, certainty: {certainty}%}}\n"
159
+ detected_objects_list.append((label_name, box_rounded, certainty))
160
+ return detected_objects_str, detected_objects_list
161
+
162
+ def _detect_with_yolov5(self, image, threshold):
163
+ """
164
+ Detect objects using the YOLOv5 model.
165
+
166
+ Args:
167
+ image (Image.Image): The image in which to detect objects.
168
+ threshold (float): The confidence threshold for detections.
169
+
170
+ Returns:
171
+ tuple: A tuple containing a string representation and a list of detected objects.
172
+ Each object in the list is represented as a tuple (label_name, box_rounded, certainty).
173
+ """
174
+
175
+ cv2_img = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
176
+ results = self.model(cv2_img)
177
+
178
+ detected_objects_str = ""
179
+ detected_objects_list = []
180
+ for *bbox, conf, cls in results.xyxy[0]:
181
+ if conf >= threshold:
182
+ label_name = results.names[int(cls)]
183
+ box_rounded = [round(coord.item(), 2) for coord in bbox]
184
+ certainty = round(conf.item() * 100, 2)
185
+ detected_objects_str += f"{{object: {label_name}, bounding box: {box_rounded}, certainty: {certainty}%}}\n"
186
+ detected_objects_list.append((label_name, box_rounded, certainty))
187
+ return detected_objects_str, detected_objects_list
188
+
189
+ def draw_boxes(self, image, detected_objects, show_confidence=True):
190
+ """
191
+ Draw bounding boxes around detected objects in the image.
192
+
193
+ Args:
194
+ image (Image.Image): Image on which to draw.
195
+ detected_objects (list): List of detected objects.
196
+ show_confidence (bool): Whether to show confidence scores.
197
+
198
+ Returns:
199
+ Image.Image: Image with drawn boxes.
200
+ """
201
+
202
+ draw = ImageDraw.Draw(image)
203
+ try:
204
+ font = ImageFont.truetype("arial.ttf", 15)
205
+ except IOError:
206
+ font = ImageFont.load_default()
207
+
208
+ colors = ["red", "green", "blue", "yellow", "purple", "orange"]
209
+ label_color_map = {}
210
+
211
+ for label_name, box, score in detected_objects:
212
+ if label_name not in label_color_map:
213
+ label_color_map[label_name] = colors[len(label_color_map) % len(colors)]
214
+
215
+ color = label_color_map[label_name]
216
+ draw.rectangle(box, outline=color, width=3)
217
+
218
+ label_text = f"{label_name}"
219
+ if show_confidence:
220
+ label_text += f" ({round(score, 2)}%)"
221
+ draw.text((box[0], box[1]), label_text, fill=color, font=font)
222
+
223
+ return image
224
+
225
+
226
+ def detect_and_draw_objects(image_path, model_type='yolov5', threshold=0.2, show_confidence=True):
227
+ """
228
+ Detects objects in an image, draws bounding boxes around them, and returns the processed image and a string description.
229
+
230
+ Args:
231
+ image_path (str): Path to the image file.
232
+ model_type (str): Type of model to use for detection ('yolov5' or 'detic').
233
+ threshold (float): Detection threshold.
234
+ show_confidence (bool): Whether to show confidence scores on the output image.
235
+
236
+ Returns:
237
+ tuple: A tuple containing the processed Image.Image and a string of detected objects.
238
+ """
239
+
240
+ detector = ObjectDetector()
241
+ detector.load_model(model_type)
242
+ image = detector.process_image(image_path)
243
+ detected_objects_string, detected_objects_list = detector.detect_objects(image, threshold=threshold)
244
+ image_with_boxes = detector.draw_boxes(image, detected_objects_list, show_confidence=show_confidence)
245
+ return image_with_boxes, detected_objects_string
246
+
247
+
248
+ # Example usage
249
+ if __name__ == "__main__":
250
+ pass
251
+
252
+ # 'Sample_Images' is the folder conatining sample images for demo.
253
+ image_path = get_path('horse.jpg', 'Sample_Images')
254
+ processed_image, objects_string = detect_and_draw_objects(image_path,
255
+ model_type='detic',
256
+ threshold=0.2,
257
+ show_confidence=False)
258
+ show_image(processed_image)
259
+ print("Detected Objects:", objects_string)
my_model/utilities.py ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from collections import Counter
3
+ import json
4
+ import os
5
+ from PIL import Image
6
+ import numpy as np
7
+ import torch
8
+ import matplotlib.pyplot as plt
9
+ from IPython import get_ipython
10
+ import sys
11
+
12
+
13
+ class VQADataProcessor:
14
+ """
15
+ A class to process OKVQA dataset.
16
+
17
+ Attributes:
18
+ questions_file_path (str): The file path for the questions JSON file.
19
+ annotations_file_path (str): The file path for the annotations JSON file.
20
+ questions (list): List of questions extracted from the JSON file.
21
+ annotations (list): List of annotations extracted from the JSON file.
22
+ df_questions (DataFrame): DataFrame created from the questions list.
23
+ df_answers (DataFrame): DataFrame created from the annotations list.
24
+ merged_df (DataFrame): DataFrame resulting from merging questions and answers.
25
+ """
26
+
27
+ def __init__(self, questions_file_path, annotations_file_path):
28
+ """
29
+ Initializes the VQADataProcessor with file paths for questions and annotations.
30
+
31
+ Parameters:
32
+ questions_file_path (str): The file path for the questions JSON file.
33
+ annotations_file_path (str): The file path for the annotations JSON file.
34
+ """
35
+ self.questions_file_path = questions_file_path
36
+ self.annotations_file_path = annotations_file_path
37
+ self.questions, self.annotations = self.read_json_files()
38
+ self.df_questions = pd.DataFrame(self.questions)
39
+ self.df_answers = pd.DataFrame(self.annotations)
40
+ self.merged_df = None
41
+
42
+ def read_json_files(self):
43
+ """
44
+ Reads the JSON files for questions and annotations.
45
+
46
+ Returns:
47
+ tuple: A tuple containing two lists: questions and annotations.
48
+ """
49
+ with open(self.questions_file_path, 'r') as file:
50
+ data = json.load(file)
51
+ questions = data['questions']
52
+
53
+ with open(self.annotations_file_path, 'r') as file:
54
+ data = json.load(file)
55
+ annotations = data['annotations']
56
+
57
+ return questions, annotations
58
+
59
+ @staticmethod
60
+ def find_most_frequent(my_list):
61
+ """
62
+ Finds the most frequent item in a list.
63
+
64
+ Parameters:
65
+ my_list (list): A list of items.
66
+
67
+ Returns:
68
+ The most frequent item in the list. Returns None if the list is empty.
69
+ """
70
+ if not my_list:
71
+ return None
72
+ counter = Counter(my_list)
73
+ most_common = counter.most_common(1)
74
+ return most_common[0][0]
75
+
76
+ def merge_dataframes(self):
77
+ """
78
+ Merges the questions and answers DataFrames on 'question_id' and 'image_id'.
79
+ """
80
+ self.merged_df = pd.merge(self.df_questions, self.df_answers, on=['question_id', 'image_id'])
81
+
82
+ def join_words_with_hyphen(self, sentence):
83
+
84
+ return '-'.join(sentence.split())
85
+
86
+ def process_answers(self):
87
+ """
88
+ Processes the answers by extracting raw and processed answers and finding the most frequent ones.
89
+ """
90
+ if self.merged_df is not None:
91
+ self.merged_df['raw_answers'] = self.merged_df['answers'].apply(lambda x: [ans['raw_answer'] for ans in x])
92
+ self.merged_df['processed_answers'] = self.merged_df['answers'].apply(
93
+ lambda x: [ans['answer'] for ans in x])
94
+ self.merged_df['most_frequent_raw_answer'] = self.merged_df['raw_answers'].apply(self.find_most_frequent)
95
+ self.merged_df['most_frequent_processed_answer'] = self.merged_df['processed_answers'].apply(
96
+ self.find_most_frequent)
97
+ self.merged_df.drop(columns=['answers'], inplace=True)
98
+ else:
99
+ print("DataFrames have not been merged yet.")
100
+
101
+ # Apply the function to the 'most_frequent_processed_answer' column
102
+ self.merged_df['single_word_answers'] = self.merged_df['most_frequent_processed_answer'].apply(
103
+ self.join_words_with_hyphen)
104
+
105
+ def get_processed_data(self):
106
+ """
107
+ Retrieves the processed DataFrame.
108
+
109
+ Returns:
110
+ DataFrame: The processed DataFrame. Returns None if the DataFrame is empty or not processed.
111
+ """
112
+ if self.merged_df is not None:
113
+ return self.merged_df
114
+ else:
115
+ print("DataFrame is empty or not processed yet.")
116
+ return None
117
+
118
+ def save_to_csv(self, df, saved_file_name):
119
+
120
+ if saved_file_name is not None:
121
+ if ".csv" not in saved_file_name:
122
+ df.to_csv(os.path.join(saved_file_name, ".csv"), index=None)
123
+
124
+ else:
125
+ df.to_csv(saved_file_name, index=None)
126
+
127
+ else:
128
+ df.to_csv("data.csv", index=None)
129
+
130
+ def display_dataframe(self):
131
+ """
132
+ Displays the processed DataFrame.
133
+ """
134
+ if self.merged_df is not None:
135
+ print(self.merged_df)
136
+ else:
137
+ print("DataFrame is empty.")
138
+
139
+
140
+ def process_okvqa_dataset(questions_file_path, annotations_file_path, save_to_csv=False, saved_file_name=None):
141
+ """
142
+ Processes the OK-VQA dataset given the file paths for questions and annotations.
143
+
144
+ Parameters:
145
+ questions_file_path (str): The file path for the questions JSON file.
146
+ annotations_file_path (str): The file path for the annotations JSON file.
147
+
148
+ Returns:
149
+ DataFrame: The processed DataFrame containing merged and processed VQA data.
150
+ """
151
+ # Create an instance of the class
152
+ processor = VQADataProcessor(questions_file_path, annotations_file_path)
153
+
154
+ # Process the data
155
+ processor.merge_dataframes()
156
+ processor.process_answers()
157
+
158
+ # Retrieve the processed DataFrame
159
+ processed_data = processor.get_processed_data()
160
+
161
+ if save_to_csv:
162
+ processor.save_to_csv(processed_data, saved_file_name)
163
+
164
+ return processed_data
165
+
166
+
167
+ def show_image(image):
168
+ """
169
+ Display an image in various environments (Jupyter, PyCharm, Hugging Face Spaces).
170
+ Handles different types of image inputs (file path, PIL Image, numpy array, OpenCV, PyTorch tensor).
171
+
172
+ Args:
173
+ image (str or PIL.Image or numpy.ndarray or torch.Tensor): The image to display.
174
+ """
175
+ in_jupyter = is_jupyter_notebook()
176
+ in_colab = is_google_colab()
177
+
178
+ # Convert image to PIL Image if it's a file path, numpy array, or PyTorch tensor
179
+ if isinstance(image, str):
180
+
181
+ if os.path.isfile(image):
182
+ image = Image.open(image)
183
+ else:
184
+ raise ValueError("File path provided does not exist.")
185
+ elif isinstance(image, np.ndarray):
186
+
187
+ if image.ndim == 3 and image.shape[2] in [3, 4]:
188
+
189
+ image = Image.fromarray(image[..., ::-1] if image.shape[2] == 3 else image)
190
+ else:
191
+
192
+ image = Image.fromarray(image)
193
+ elif torch.is_tensor(image):
194
+
195
+ image = Image.fromarray(image.permute(1, 2, 0).numpy().astype(np.uint8))
196
+
197
+ # Display the image
198
+ if in_jupyter or in_colab:
199
+
200
+ from IPython.display import display
201
+ display(image)
202
+ else:
203
+
204
+ image.show()
205
+
206
+
207
+
208
+ def show_image_with_matplotlib(image):
209
+ if isinstance(image, str):
210
+ image = Image.open(image)
211
+ elif isinstance(image, np.ndarray):
212
+ image = Image.fromarray(image)
213
+ elif torch.is_tensor(image):
214
+ image = Image.fromarray(image.permute(1, 2, 0).numpy().astype(np.uint8))
215
+
216
+ plt.imshow(image)
217
+ plt.axis('off') # Turn off axis numbers
218
+ plt.show()
219
+
220
+
221
+ def is_jupyter_notebook():
222
+ """
223
+ Check if the code is running in a Jupyter notebook.
224
+
225
+ Returns:
226
+ bool: True if running in a Jupyter notebook, False otherwise.
227
+ """
228
+ try:
229
+ from IPython import get_ipython
230
+ if 'IPKernelApp' not in get_ipython().config:
231
+ return False
232
+ if 'ipykernel' in str(type(get_ipython())):
233
+ return True # Running in Jupyter Notebook
234
+ except (NameError, AttributeError):
235
+ return False # Not running in Jupyter Notebook
236
+
237
+ return False # Default to False if none of the above conditions are met
238
+
239
+
240
+ def is_pycharm():
241
+ return 'PYCHARM_HOSTED' in os.environ
242
+
243
+
244
+ def is_google_colab():
245
+ return 'COLAB_GPU' in os.environ or 'google.colab' in sys.modules
246
+
247
+
248
+ def get_path(name, path_type):
249
+ """
250
+ Generates a path for models, images, or data based on the specified type.
251
+
252
+ Args:
253
+ name (str): The name of the model, image, or data folder/file.
254
+ path_type (str): The type of path needed ('models', 'images', or 'data').
255
+
256
+ Returns:
257
+ str: The full path to the specified resource.
258
+ """
259
+ # Get the current working directory (assumed to be inside 'code' folder)
260
+ current_dir = os.getcwd()
261
+
262
+ # Get the directory one level up (the parent directory)
263
+ parent_dir = os.path.dirname(current_dir)
264
+
265
+ # Construct the path to the specified folder
266
+ folder_path = os.path.join(parent_dir, path_type)
267
+
268
+ # Construct the full path to the specific resource
269
+ full_path = os.path.join(folder_path, name)
270
+
271
+ return full_path
272
+
273
+
274
+
275
+ if __name__ == "__main__":
276
+ pass
277
+ #val_data = process_okvqa_dataset('OpenEnded_mscoco_val2014_questions.json', 'mscoco_val2014_annotations.json', save_to_csv=True, saved_file_name="okvqa_val.csv")
278
+ #train_data = process_okvqa_dataset('OpenEnded_mscoco_train2014_questions.json', 'mscoco_train2014_annotations.json', save_to_csv=True, saved_file_name="okvqa_train.csv")