sanjanatule commited on
Commit
d908a2b
1 Parent(s): 1661c47

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -0
app.py CHANGED
@@ -13,6 +13,7 @@ tokenizer = AutoTokenizer.from_pretrained(phi_model_name, trust_remote_code=Tru
13
  processor = AutoProcessor.from_pretrained(clip_model_name)
14
  tokenizer.pad_token = tokenizer.eos_token
15
  IMAGE_TOKEN_ID = 23893 # token for word comment
 
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
17
  clip_embed = 768
18
  phi_embed = 2560
@@ -83,6 +84,12 @@ def model_generate_ans(img=None,img_audio=None,val_q=None):
83
  val_combined_embeds.append(val_q_embeds)
84
 
85
 
 
 
 
 
 
 
86
  val_combined_embeds = torch.cat(val_combined_embeds,dim=1)
87
  predicted_caption = merged_model.generate(inputs_embeds=val_combined_embeds,
88
  max_new_tokens=max_generate_length,
 
13
  processor = AutoProcessor.from_pretrained(clip_model_name)
14
  tokenizer.pad_token = tokenizer.eos_token
15
  IMAGE_TOKEN_ID = 23893 # token for word comment
16
+ QA_TOKEN_ID = 50295 # token for qa
17
  device = "cuda" if torch.cuda.is_available() else "cpu"
18
  clip_embed = 768
19
  phi_embed = 2560
 
84
  val_combined_embeds.append(val_q_embeds)
85
 
86
 
87
+ if img_audio is not None or len(val_q) != 0: # add QA Token
88
+
89
+ QA_token_tensor = torch.tensor(QA_TOKEN_ID).to(device)
90
+ QA_token_embeds = merged_model.model.embed_tokens(QA_token_tensor).unsqueeze(0).unsqueeze(0)
91
+ val_combined_embeds.append(QA_token_embeds)
92
+
93
  val_combined_embeds = torch.cat(val_combined_embeds,dim=1)
94
  predicted_caption = merged_model.generate(inputs_embeds=val_combined_embeds,
95
  max_new_tokens=max_generate_length,