Spaces:
Sleeping
Sleeping
Commit
•
d908a2b
1
Parent(s):
1661c47
Update app.py
Browse files
app.py
CHANGED
@@ -13,6 +13,7 @@ tokenizer = AutoTokenizer.from_pretrained(phi_model_name, trust_remote_code=Tru
|
|
13 |
processor = AutoProcessor.from_pretrained(clip_model_name)
|
14 |
tokenizer.pad_token = tokenizer.eos_token
|
15 |
IMAGE_TOKEN_ID = 23893 # token for word comment
|
|
|
16 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
17 |
clip_embed = 768
|
18 |
phi_embed = 2560
|
@@ -83,6 +84,12 @@ def model_generate_ans(img=None,img_audio=None,val_q=None):
|
|
83 |
val_combined_embeds.append(val_q_embeds)
|
84 |
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
val_combined_embeds = torch.cat(val_combined_embeds,dim=1)
|
87 |
predicted_caption = merged_model.generate(inputs_embeds=val_combined_embeds,
|
88 |
max_new_tokens=max_generate_length,
|
|
|
13 |
processor = AutoProcessor.from_pretrained(clip_model_name)
|
14 |
tokenizer.pad_token = tokenizer.eos_token
|
15 |
IMAGE_TOKEN_ID = 23893 # token for word comment
|
16 |
+
QA_TOKEN_ID = 50295 # token for qa
|
17 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
18 |
clip_embed = 768
|
19 |
phi_embed = 2560
|
|
|
84 |
val_combined_embeds.append(val_q_embeds)
|
85 |
|
86 |
|
87 |
+
if img_audio is not None or len(val_q) != 0: # add QA Token
|
88 |
+
|
89 |
+
QA_token_tensor = torch.tensor(QA_TOKEN_ID).to(device)
|
90 |
+
QA_token_embeds = merged_model.model.embed_tokens(QA_token_tensor).unsqueeze(0).unsqueeze(0)
|
91 |
+
val_combined_embeds.append(QA_token_embeds)
|
92 |
+
|
93 |
val_combined_embeds = torch.cat(val_combined_embeds,dim=1)
|
94 |
predicted_caption = merged_model.generate(inputs_embeds=val_combined_embeds,
|
95 |
max_new_tokens=max_generate_length,
|