Blanca commited on
Commit
d5e45a8
·
verified ·
1 Parent(s): 315e48e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -38
app.py CHANGED
@@ -232,47 +232,49 @@ def add_new_eval(
232
 
233
  #print(reference_set, flush=True)
234
  if len(line['cqs']) < 3 or type(line['cqs']) is not list: # make sure there are at least 3 cqs
 
235
  #return format_warning("Make sure that there are at least 3 questions per intervention, or check that the format is right.")
236
- intervention_score = 0
237
- nae = 3
238
- else:
239
- for cq in line['cqs'][:3]: # here only take the first 3 cqs
240
- cq_text = cq['cq']
241
-
242
- if METRIC == 'similarity':
243
- sentence_embedding = similarity_model.encode(cq_text)
244
- #reference_embedding = similarity_model.encode(reference_set) # TODO: here have the embeddings directly, do no calculate each time
245
- sims = similarity_model.similarity(sentence_embedding, reference_embeddings).tolist()[0]
246
-
247
- winner = np.argmax(sims)
248
- # make sure the similarity of the winning reference sentence is at least 0.65
249
- if sims[winner] > 0.65:
250
- label = references[indx][winner]['label']
251
- else:
252
- label = 'not_able_to_evaluate'
253
 
254
- if METRIC == 'gemma':
255
- prompts = get_prompts(cq_text, '\n'.join(reference_set))
256
- winner = run_model(model, tokenizer, prompts['compare'])
257
- try: # here make sure the output is the id of a reference cq
258
- if winner.strip() != 'Similar reference not found.':
259
- label = references[index][int(winner)]['label']
260
- else:
261
- label = 'not_able_to_evaluate'
262
- print(winner, flush=True)
263
- except IndexError:
264
- label = 'evaluation_issue'
265
- print(winner, flush=True)
266
- except ValueError:
267
- label = 'evaluation_issue'
 
268
  print(winner, flush=True)
269
-
270
- #print(label, flush=True)
271
- num_cqs += 1
272
- if label == 'Useful':
273
- intervention_score += 1/3
274
- if label == 'not_able_to_evaluate':
275
- nae += 1
 
 
 
 
 
 
276
 
277
  #print(id_to_eval, intervention_score, flush=True)
278
  scores.append(intervention_score)
 
232
 
233
  #print(reference_set, flush=True)
234
  if len(line['cqs']) < 3 or type(line['cqs']) is not list: # make sure there are at least 3 cqs
235
+ num_cqs += 3
236
  #return format_warning("Make sure that there are at least 3 questions per intervention, or check that the format is right.")
237
+ continue
238
+ for cq in line['cqs'][:3]: # here only take the first 3 cqs
239
+ if type(cq) is not dict:
240
+ num_cqs += 1
241
+ continue
242
+ cq_text = cq['cq']
243
+
244
+ if METRIC == 'similarity':
245
+ sentence_embedding = similarity_model.encode(cq_text)
246
+ #reference_embedding = similarity_model.encode(reference_set) # TODO: here have the embeddings directly, do no calculate each time
247
+ sims = similarity_model.similarity(sentence_embedding, reference_embeddings).tolist()[0]
 
 
 
 
 
 
248
 
249
+ winner = np.argmax(sims)
250
+ # make sure the similarity of the winning reference sentence is at least 0.65
251
+ if sims[winner] > 0.65:
252
+ label = references[indx][winner]['label']
253
+ else:
254
+ label = 'not_able_to_evaluate'
255
+
256
+ if METRIC == 'gemma':
257
+ prompts = get_prompts(cq_text, '\n'.join(reference_set))
258
+ winner = run_model(model, tokenizer, prompts['compare'])
259
+ try: # here make sure the output is the id of a reference cq
260
+ if winner.strip() != 'Similar reference not found.':
261
+ label = references[index][int(winner)]['label']
262
+ else:
263
+ label = 'not_able_to_evaluate'
264
  print(winner, flush=True)
265
+ except IndexError:
266
+ label = 'evaluation_issue'
267
+ print(winner, flush=True)
268
+ except ValueError:
269
+ label = 'evaluation_issue'
270
+ print(winner, flush=True)
271
+
272
+ #print(label, flush=True)
273
+ num_cqs += 1
274
+ if label == 'Useful':
275
+ intervention_score += 1/3
276
+ if label == 'not_able_to_evaluate':
277
+ nae += 1
278
 
279
  #print(id_to_eval, intervention_score, flush=True)
280
  scores.append(intervention_score)