Nemil commited on
Commit
7927ce3
β€’
1 Parent(s): 8c2556c

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -48
app.py CHANGED
@@ -7,7 +7,7 @@ def install(package):
7
  install("evaluate")
8
  install("jiwer")
9
  install("huggingface_hub")
10
- install("gradio==3.36.0")
11
  install("bitsandbytes")
12
  install("git+https://github.com/huggingface/transformers.git")
13
  install("git+https://github.com/huggingface/peft.git")
@@ -17,8 +17,6 @@ install("safetensors")
17
  install("torch")
18
  install("xformers")
19
  install("datasets")
20
- install("stable-diffusion")
21
- install("accelerate")
22
 
23
  from transformers import AutoProcessor, AutoModelForCausalLM, BitsAndBytesConfig
24
  import torch
@@ -132,7 +130,6 @@ from peft import (
132
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoConfig
133
  from peft import LoraConfig, get_peft_model
134
 
135
-
136
  os.environ["CUDA_VISIBLE_DEVICES"] = "0"
137
 
138
  class Social_Media_Captioner:
@@ -157,52 +154,58 @@ class Social_Media_Captioner:
157
 
158
 
159
  def _load_model(self):
160
- self.bnb_config = BitsAndBytesConfig(
161
- load_in_4bit = True,
162
- bnb_4bit_use_double_quant = True,
163
- bnb_4bit_quant_type= "nf4",
164
- bnb_4bit_compute_dtype=torch.bfloat16,
165
- )
166
- self.model = AutoModelForCausalLM.from_pretrained(
167
- self.MODEL_NAME,
168
- device_map = "auto",
169
- trust_remote_code = True,
170
- quantization_config = self.bnb_config
171
- )
172
-
173
- # Defining the tokenizers
174
- self.tokenizer = AutoTokenizer.from_pretrained(self.MODEL_NAME)
175
- self.tokenizer.pad_token = self.tokenizer.eos_token
176
-
177
- if self.use_finetuned:
178
- # LORA Config Model
179
- self.lora_config = LoraConfig(
180
- r=16,
181
- lora_alpha=32,
182
- target_modules=["query_key_value"],
183
- lora_dropout=0.05,
184
- bias="none",
185
- task_type="CAUSAL_LM"
186
- )
187
- self.model = get_peft_model(self.model, self.lora_config)
188
-
189
- # Fitting the adapters
190
- self.peft_config = PeftConfig.from_pretrained(self.peft_model_name)
191
  self.model = AutoModelForCausalLM.from_pretrained(
192
- self.peft_config.base_model_name_or_path,
193
- return_dict = True,
194
- quantization_config = self.bnb_config,
195
- device_map= "auto",
196
- trust_remote_code = True
197
  )
198
- self.model = PeftModel.from_pretrained(self.model, self.peft_model_name)
199
 
200
  # Defining the tokenizers
201
- self.tokenizer = AutoTokenizer.from_pretrained(self.peft_config.base_model_name_or_path)
202
  self.tokenizer.pad_token = self.tokenizer.eos_token
203
 
204
- self.model_loaded = True
205
- print("Model Loaded successfully")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
  def inference(self, input_text: str, use_cached=True, cache_generation=True) -> str | None:
208
  if not self.model_loaded:
@@ -246,7 +249,7 @@ class Social_Media_Captioner:
246
  raise Exception("Enter a valid input text to generate a valid prompt")
247
 
248
  return f"""
249
- Convert the given image description to a appropriate metaphoric caption
250
  Description: {input_text}
251
  Caption:
252
  """.strip()
@@ -295,18 +298,20 @@ class Captions:
295
  image_description = self.image_to_text._generate_description(image, max_length=max_length_GIT)
296
  captions = self.LLM.inference(image_description, use_cached=use_cached_LLM, cache_generation=cache_generation_LLM)
297
  return captions
298
-
299
  caption_generator = Captions()
300
 
301
  import gradio as gr
302
 
303
  def setup(image):
 
 
304
  return caption_generator.generate_captions(image = image)
305
 
306
  iface = gr.Interface(
307
  fn=setup,
308
- inputs=gr.inputs.Image(type="pil", label="Upload Image"),
309
- outputs=gr.outputs.Textbox(label="Caption")
310
  )
311
 
312
  iface.launch()
 
7
  install("evaluate")
8
  install("jiwer")
9
  install("huggingface_hub")
10
+ install("gradio")
11
  install("bitsandbytes")
12
  install("git+https://github.com/huggingface/transformers.git")
13
  install("git+https://github.com/huggingface/peft.git")
 
17
  install("torch")
18
  install("xformers")
19
  install("datasets")
 
 
20
 
21
  from transformers import AutoProcessor, AutoModelForCausalLM, BitsAndBytesConfig
22
  import torch
 
130
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoConfig
131
  from peft import LoraConfig, get_peft_model
132
 
 
133
  os.environ["CUDA_VISIBLE_DEVICES"] = "0"
134
 
135
  class Social_Media_Captioner:
 
154
 
155
 
156
  def _load_model(self):
157
+ try:
158
+ self.bnb_config = BitsAndBytesConfig(
159
+ load_in_4bit = True,
160
+ bnb_4bit_use_double_quant = True,
161
+ bnb_4bit_quant_type= "nf4",
162
+ bnb_4bit_compute_dtype=torch.bfloat16,
163
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  self.model = AutoModelForCausalLM.from_pretrained(
165
+ self.MODEL_NAME,
166
+ device_map = "auto",
167
+ trust_remote_code = True,
168
+ quantization_config = self.bnb_config
 
169
  )
 
170
 
171
  # Defining the tokenizers
172
+ self.tokenizer = AutoTokenizer.from_pretrained(self.MODEL_NAME)
173
  self.tokenizer.pad_token = self.tokenizer.eos_token
174
 
175
+ if self.use_finetuned:
176
+ # LORA Config Model
177
+ self.lora_config = LoraConfig(
178
+ r=16,
179
+ lora_alpha=32,
180
+ target_modules=["query_key_value"],
181
+ lora_dropout=0.05,
182
+ bias="none",
183
+ task_type="CAUSAL_LM"
184
+ )
185
+ self.model = get_peft_model(self.model, self.lora_config)
186
+
187
+ # Fitting the adapters
188
+ self.peft_config = PeftConfig.from_pretrained(self.peft_model_name)
189
+ self.model = AutoModelForCausalLM.from_pretrained(
190
+ self.peft_config.base_model_name_or_path,
191
+ return_dict = True,
192
+ quantization_config = self.bnb_config,
193
+ device_map= "auto",
194
+ trust_remote_code = True
195
+ )
196
+ self.model = PeftModel.from_pretrained(self.model, self.peft_model_name)
197
+
198
+ # Defining the tokenizers
199
+ self.tokenizer = AutoTokenizer.from_pretrained(self.peft_config.base_model_name_or_path)
200
+ self.tokenizer.pad_token = self.tokenizer.eos_token
201
+
202
+ self.model_loaded = True
203
+ print("Model Loaded successfully")
204
+
205
+ except Exception as e:
206
+ print(e)
207
+ self.model_loaded = False
208
+
209
 
210
  def inference(self, input_text: str, use_cached=True, cache_generation=True) -> str | None:
211
  if not self.model_loaded:
 
249
  raise Exception("Enter a valid input text to generate a valid prompt")
250
 
251
  return f"""
252
+ Convert the given image description to social media worthy metaphoric caption
253
  Description: {input_text}
254
  Caption:
255
  """.strip()
 
298
  image_description = self.image_to_text._generate_description(image, max_length=max_length_GIT)
299
  captions = self.LLM.inference(image_description, use_cached=use_cached_LLM, cache_generation=cache_generation_LLM)
300
  return captions
301
+
302
  caption_generator = Captions()
303
 
304
  import gradio as gr
305
 
306
  def setup(image):
307
+ # Assuming `caption_generator.generate_captions` is your function to generate captions.
308
+ # This is just a placeholder for your actual caption generation logic.
309
  return caption_generator.generate_captions(image = image)
310
 
311
  iface = gr.Interface(
312
  fn=setup,
313
+ inputs=gr.Image(type="pil", label="Upload Image"), # Updated usage here
314
+ outputs="text" # Simplified usage here
315
  )
316
 
317
  iface.launch()