PyTorch
English
Tevatron
phi3_v
vidore
custom_code
MrLight commited on
Commit
137fda9
1 Parent(s): cda3e23

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +24 -11
README.md CHANGED
@@ -39,9 +39,10 @@ def get_embedding(last_hidden_state: torch.Tensor, attention_mask: torch.Tensor)
39
  ### Encode Text Query
40
 
41
  ```python
42
- queries = ["query: Where can we see Llama?", "query: What is LLaMA model?"]
43
  query_inputs = processor(queries, return_tensors="pt", padding="longest", max_length=128, truncation=True).to('cuda:0')
44
- output = model(**query_inputs, return_dict=True, output_hidden_states=True)
 
45
  query_embeddings = get_embedding(output.hidden_states[-1], query_inputs["attention_mask"])
46
  ```
47
 
@@ -53,8 +54,8 @@ import requests
53
  from io import BytesIO
54
 
55
  # URLs of the images
56
- url1 = "https://huggingface.co/Tevatron/dse-phi3-docmatix-v1.0/blob/main/animal-llama.png"
57
- url2 = "https://huggingface.co/Tevatron/dse-phi3-docmatix-v1.0/blob/main/meta-llama.png"
58
 
59
  # Download and open images
60
  response1 = requests.get(url1)
@@ -64,21 +65,30 @@ passage_image1 = Image.open(BytesIO(response1.content))
64
  passage_image2 = Image.open(BytesIO(response2.content))
65
 
66
  passage_images = [passage_image1, passage_image2]
67
- passage_prompts = ["\nWhat is shown in this image?</s>", "\nWhat is shown in this image?</s>"]
68
 
69
  # Process inputs and get embeddings
70
  passage_inputs = processor(passage_prompts, images=passage_images, return_tensors="pt", padding="longest", max_length=4096, truncation=True).to('cuda:0')
71
- output = model(**passage_inputs, return_dict=True, output_hidden_states=True)
 
 
 
 
72
  doc_embeddings = get_embedding(output.hidden_states[-1], passage_inputs["attention_mask"])
 
73
  ```
74
 
75
  ### Compute Similarity
76
 
77
  ```python
78
  from torch.nn.functional import cosine_similarity
 
 
79
 
80
- similarities = cosine_similarity(query_embeddings, doc_embeddings)
81
- print(similarities)
 
 
82
  ```
83
 
84
  ### Encode Document Text
@@ -90,9 +100,12 @@ passage_prompts = [
90
  ]
91
 
92
  passage_inputs = processor(passage_prompts, images=None, return_tensors="pt", padding="longest", max_length=4096, truncation=True).to('cuda:0')
93
- output = model(**passage_inputs, return_dict=True, output_hidden_states=True)
 
94
  doc_embeddings = get_embedding(output.hidden_states[-1], passage_inputs["attention_mask"])
95
 
96
- similarities = cosine_similarity(query_embeddings, doc_embeddings)
97
- print(similarities)
 
 
98
  ```
 
39
  ### Encode Text Query
40
 
41
  ```python
42
+ queries = ["query: Where can we see Llama?</s>", "query: What is LLaMA model?</s>"]
43
  query_inputs = processor(queries, return_tensors="pt", padding="longest", max_length=128, truncation=True).to('cuda:0')
44
+ with torch.no_grad():
45
+ output = model(**query_inputs, return_dict=True, output_hidden_states=True)
46
  query_embeddings = get_embedding(output.hidden_states[-1], query_inputs["attention_mask"])
47
  ```
48
 
 
54
  from io import BytesIO
55
 
56
  # URLs of the images
57
+ url1 = "https://huggingface.co/Tevatron/dse-phi3-docmatix-v1.0/resolve/main/animal-llama.png"
58
+ url2 = "https://huggingface.co/Tevatron/dse-phi3-docmatix-v1.0/resolve/main/meta-llama.png"
59
 
60
  # Download and open images
61
  response1 = requests.get(url1)
 
65
  passage_image2 = Image.open(BytesIO(response2.content))
66
 
67
  passage_images = [passage_image1, passage_image2]
68
+ passage_prompts = ["<|image_1|>\nWhat is shown in this image?</s>", "<|image_2|>\nWhat is shown in this image?</s>"]
69
 
70
  # Process inputs and get embeddings
71
  passage_inputs = processor(passage_prompts, images=passage_images, return_tensors="pt", padding="longest", max_length=4096, truncation=True).to('cuda:0')
72
+ passage_inputs['input_ids'] = passage_inputs['input_ids'].squeeze(0)
73
+ passage_inputs['attention_mask'] = passage_inputs['attention_mask'].squeeze(0)
74
+ passage_inputs['image_sizes'] = passage_inputs['image_sizes'].squeeze(0)
75
+ with torch.no_grad():
76
+ output = model(**passage_inputs, return_dict=True, output_hidden_states=True)
77
  doc_embeddings = get_embedding(output.hidden_states[-1], passage_inputs["attention_mask"])
78
+
79
  ```
80
 
81
  ### Compute Similarity
82
 
83
  ```python
84
  from torch.nn.functional import cosine_similarity
85
+ num_queries = query_embeddings.size(0)
86
+ num_passages = doc_embeddings.size(0)
87
 
88
+ for i in range(num_queries):
89
+ query_embedding = query_embeddings[i].unsqueeze(0)
90
+ similarities = cosine_similarity(query_embedding, doc_embeddings)
91
+ print(f"Similarities for Query {i+1}: {similarities.cpu().float().numpy()}")
92
  ```
93
 
94
  ### Encode Document Text
 
100
  ]
101
 
102
  passage_inputs = processor(passage_prompts, images=None, return_tensors="pt", padding="longest", max_length=4096, truncation=True).to('cuda:0')
103
+ with torch.no_grad():
104
+ output = model(**passage_inputs, return_dict=True, output_hidden_states=True)
105
  doc_embeddings = get_embedding(output.hidden_states[-1], passage_inputs["attention_mask"])
106
 
107
+ for i in range(num_queries):
108
+ query_embedding = query_embeddings[i].unsqueeze(0)
109
+ similarities = cosine_similarity(query_embedding, doc_embeddings)
110
+ print(f"Similarities for Query {i+1}: {similarities.cpu().float().numpy()}")
111
  ```