Tonic commited on
Commit
fcbecda
1 Parent(s): ace4204

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -62,11 +62,11 @@ def compute_embeddings(selected_task, input_text):
62
  max_length = 2042
63
  processed_texts = [f'Instruct: {task_description}\nQuery: {input_text}']
64
 
65
- batch_dict = self.tokenizer(processed_texts, max_length=max_length - 1, return_attention_mask=False, padding=False, truncation=True)
66
- batch_dict['input_ids'] = [input_ids + [self.tokenizer.eos_token_id] for input_ids in batch_dict['input_ids']]
67
- batch_dict = self.tokenizer.pad(batch_dict, padding=True, return_attention_mask=True, return_tensors='pt')
68
  batch_dict = {k: v.to(device) for k, v in batch_dict.items()}
69
- outputs = self.model(**batch_dict)
70
  embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
71
  embeddings = F.normalize(embeddings, p=2, dim=1)
72
  embeddings_list = embeddings.detach().cpu().numpy().tolist()
@@ -80,10 +80,10 @@ def compute_similarity(selected_task, sentence1, sentence2, extra_sentence1, ext
80
  print(f"Selected task not found: {selected_task}")
81
  return f"Error: Task '{selected_task}' not found. Please select a valid task."
82
  # Compute embeddings for each sentence
83
- embeddings1 = self.compute_embeddings(self.selected_task, sentence1)
84
- embeddings2 = self.compute_embeddings(self.selected_task, sentence2)
85
- embeddings3 = self.compute_embeddings(self.selected_task, extra_sentence1)
86
- embeddings4 = self.compute_embeddings(self.selected_task, extra_sentence2)
87
 
88
  # Convert embeddings to tensors
89
  embeddings_tensor1 = torch.tensor(embeddings1).to(device).half()
@@ -92,9 +92,9 @@ def compute_similarity(selected_task, sentence1, sentence2, extra_sentence1, ext
92
  embeddings_tensor4 = torch.tensor(embeddings4).to(device).half()
93
 
94
  # Compute cosine similarity
95
- similarity1 = self._compute_cosine_similarity(embeddings1, embeddings2)
96
- similarity2 = self._compute_cosine_similarity(embeddings1, embeddings3)
97
- similarity3 = self._compute_cosine_similarity(embeddings1, embeddings4)
98
 
99
  # Free memory
100
  free_memory(embeddings1, embeddings2, embeddings3, embeddings4)
@@ -102,7 +102,7 @@ def compute_similarity(selected_task, sentence1, sentence2, extra_sentence1, ext
102
  return similarity1, similarity2, similarity3
103
 
104
  # @spaces.GPU
105
- def _compute_cosine_similarity(emb1, emb2):
106
  tensor1 = torch.tensor(emb1).to(device).half()
107
  tensor2 = torch.tensor(emb2).to(device).half()
108
  similarity = F.cosine_similarity(tensor1, tensor2).item()
 
62
  max_length = 2042
63
  processed_texts = [f'Instruct: {task_description}\nQuery: {input_text}']
64
 
65
+ batch_dict = tokenizer(processed_texts, max_length=max_length - 1, return_attention_mask=False, padding=False, truncation=True)
66
+ batch_dict['input_ids'] = [input_ids + [tokenizer.eos_token_id] for input_ids in batch_dict['input_ids']]
67
+ batch_dict = tokenizer.pad(batch_dict, padding=True, return_attention_mask=True, return_tensors='pt')
68
  batch_dict = {k: v.to(device) for k, v in batch_dict.items()}
69
+ outputs = model(**batch_dict)
70
  embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
71
  embeddings = F.normalize(embeddings, p=2, dim=1)
72
  embeddings_list = embeddings.detach().cpu().numpy().tolist()
 
80
  print(f"Selected task not found: {selected_task}")
81
  return f"Error: Task '{selected_task}' not found. Please select a valid task."
82
  # Compute embeddings for each sentence
83
+ embeddings1 = compute_embeddings(selected_task, sentence1)
84
+ embeddings2 = compute_embeddings(selected_task, sentence2)
85
+ embeddings3 = compute_embeddings(selected_task, extra_sentence1)
86
+ embeddings4 = compute_embeddings(selected_task, extra_sentence2)
87
 
88
  # Convert embeddings to tensors
89
  embeddings_tensor1 = torch.tensor(embeddings1).to(device).half()
 
92
  embeddings_tensor4 = torch.tensor(embeddings4).to(device).half()
93
 
94
  # Compute cosine similarity
95
+ similarity1 = compute_cosine_similarity(embeddings1, embeddings2)
96
+ similarity2 = compute_cosine_similarity(embeddings1, embeddings3)
97
+ similarity3 = compute_cosine_similarity(embeddings1, embeddings4)
98
 
99
  # Free memory
100
  free_memory(embeddings1, embeddings2, embeddings3, embeddings4)
 
102
  return similarity1, similarity2, similarity3
103
 
104
  # @spaces.GPU
105
+ def compute_cosine_similarity(emb1, emb2):
106
  tensor1 = torch.tensor(emb1).to(device).half()
107
  tensor2 = torch.tensor(emb2).to(device).half()
108
  similarity = F.cosine_similarity(tensor1, tensor2).item()