Spaces:

HiTZ
/

Critical_Questions_Leaderboard

Running

App Files Files

xet

Community

Blanca commited on 19 days ago

Commit

d5e45a8

verified ·

1 Parent(s): 315e48e

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -38

app.py CHANGED Viewed

@@ -232,47 +232,49 @@ def add_new_eval(
                         #print(reference_set, flush=True)
                         if len(line['cqs']) < 3 or type(line['cqs']) is not list: # make sure there are at least 3 cqs
                             #return format_warning("Make sure that there are at least 3 questions per intervention, or check that the format is right.")
-                            intervention_score = 0
-                            nae = 3
-                        else:
-                            for cq in line['cqs'][:3]: # here only take the first 3 cqs
-                                cq_text = cq['cq']
-                                if METRIC == 'similarity':
-                                    sentence_embedding = similarity_model.encode(cq_text)
-                                    #reference_embedding = similarity_model.encode(reference_set) # TODO: here have the embeddings directly, do no calculate each time
-                                    sims = similarity_model.similarity(sentence_embedding, reference_embeddings).tolist()[0]
-                                    winner = np.argmax(sims)
-                                    # make sure the similarity of the winning reference sentence is at least 0.65
-                                    if sims[winner] > 0.65:
-                                        label = references[indx][winner]['label']
-                                    else:
-                                        label = 'not_able_to_evaluate'
-                                if METRIC == 'gemma':
-                                    prompts = get_prompts(cq_text, '\n'.join(reference_set))
-                                    winner = run_model(model, tokenizer, prompts['compare'])
-                                    try: # here make sure the output is the id of a reference cq
-                                        if winner.strip() != 'Similar reference not found.':
-                                            label = references[index][int(winner)]['label']
-                                        else:
-                                            label = 'not_able_to_evaluate'
-                                            print(winner, flush=True)
-                                    except IndexError:
-                                        label = 'evaluation_issue'
-                                        print(winner, flush=True)
-                                    except ValueError:
-                                        label = 'evaluation_issue'
                                         print(winner, flush=True)
-                                #print(label, flush=True)
-                                num_cqs += 1
-                                if label == 'Useful':
-                                    intervention_score += 1/3
-                                if label == 'not_able_to_evaluate':
-                                    nae += 1
                 #print(id_to_eval, intervention_score, flush=True)
                 scores.append(intervention_score)

                         #print(reference_set, flush=True)
                         if len(line['cqs']) < 3 or type(line['cqs']) is not list: # make sure there are at least 3 cqs
+                            num_cqs += 3
                             #return format_warning("Make sure that there are at least 3 questions per intervention, or check that the format is right.")
+                            continue
+                        for cq in line['cqs'][:3]: # here only take the first 3 cqs
+                            if type(cq) is not dict:
+                                num_cqs += 1
+                                continue
+                            cq_text = cq['cq']
+                            if METRIC == 'similarity':
+                                sentence_embedding = similarity_model.encode(cq_text)
+                                #reference_embedding = similarity_model.encode(reference_set) # TODO: here have the embeddings directly, do no calculate each time
+                                sims = similarity_model.similarity(sentence_embedding, reference_embeddings).tolist()[0]
+                                winner = np.argmax(sims)
+                                # make sure the similarity of the winning reference sentence is at least 0.65
+                                if sims[winner] > 0.65:
+                                    label = references[indx][winner]['label']
+                                else:
+                                    label = 'not_able_to_evaluate'
+                            if METRIC == 'gemma':
+                                prompts = get_prompts(cq_text, '\n'.join(reference_set))
+                                winner = run_model(model, tokenizer, prompts['compare'])
+                                try: # here make sure the output is the id of a reference cq
+                                    if winner.strip() != 'Similar reference not found.':
+                                        label = references[index][int(winner)]['label']
+                                    else:
+                                        label = 'not_able_to_evaluate'
                                         print(winner, flush=True)
+                                except IndexError:
+                                    label = 'evaluation_issue'
+                                    print(winner, flush=True)
+                                except ValueError:
+                                    label = 'evaluation_issue'
+                                    print(winner, flush=True)
+                            #print(label, flush=True)
+                            num_cqs += 1
+                            if label == 'Useful':
+                                intervention_score += 1/3
+                            if label == 'not_able_to_evaluate':
+                                nae += 1
                 #print(id_to_eval, intervention_score, flush=True)
                 scores.append(intervention_score)