Monimoy commited on
Commit
a0852ae
·
verified ·
1 Parent(s): 4e8f700

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -147,11 +147,12 @@ def predict1(image_input, question):
147
  with torch.no_grad():
148
  # Get image embeddings
149
  image_embeddings = image_encoder(image)
150
- projected_image_embeddings = model.image_projection(image_embeddings)
151
 
152
  # Reshape image embeddings to (batch_size, 1, phi3_embed_dim)
153
- projected_image_embeddings = projected_image_embeddings.unsqueeze(1)
154
-
 
155
  # Concatenate along the sequence dimension (dim=1)
156
  extended_attention_mask = torch.cat([torch.ones(projected_image_embeddings.shape[:2], device=encoded["attention_mask"].device), encoded["attention_mask"]], dim=1)
157
  extended_input_ids = torch.cat([torch.zeros(projected_image_embeddings.shape[:2], dtype=torch.long, device=encoded["input_ids"].device), encoded["input_ids"]], dim=1)
 
147
  with torch.no_grad():
148
  # Get image embeddings
149
  image_embeddings = image_encoder(image)
150
+ #projected_image_embeddings = model.image_projection(image_embeddings)
151
 
152
  # Reshape image embeddings to (batch_size, 1, phi3_embed_dim)
153
+ #projected_image_embeddings = projected_image_embeddings.unsqueeze(1)
154
+ projected_image_embeddings = image_embeddings.unsqueeze(1)
155
+
156
  # Concatenate along the sequence dimension (dim=1)
157
  extended_attention_mask = torch.cat([torch.ones(projected_image_embeddings.shape[:2], device=encoded["attention_mask"].device), encoded["attention_mask"]], dim=1)
158
  extended_input_ids = torch.cat([torch.zeros(projected_image_embeddings.shape[:2], dtype=torch.long, device=encoded["input_ids"].device), encoded["input_ids"]], dim=1)