kaikaidai commited on
Commit
f6a6b20
·
verified ·
1 Parent(s): 08422fa

New models appear more often

Browse files
Files changed (1) hide show
  1. app.py +15 -12
app.py CHANGED
@@ -4,18 +4,17 @@ import random
4
  from collections import defaultdict
5
  from datetime import datetime
6
  import hashlib
 
7
 
8
  from dotenv import load_dotenv
9
-
10
  load_dotenv()
11
 
12
- import gradio as gr
13
  from gen_api_answer import (
14
  get_model_response,
15
  parse_model_response,
16
  prometheus_parse_model_response,
17
  atla_parse_model_response,
18
- flow_judge_parse_model_response,
19
  )
20
 
21
  from random_sample_generation import (
@@ -24,7 +23,9 @@ from random_sample_generation import (
24
  generate_ai_response
25
  )
26
  from db import add_vote, create_db_connection, get_votes
 
27
  from utils import Vote
 
28
  from common import (
29
  POLICY_CONTENT,
30
  ACKNOWLEDGEMENTS,
@@ -717,18 +718,21 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
717
  else:
718
  model_a, model_b = other_model, atla_model
719
  else:
720
- # For subsequent games, new model appears 30% of the time
721
- if random.random() < 0.3:
722
- other_models = [m for m in active_models if m != atla_model]
 
 
723
  other_model = random.choice(other_models)
724
 
725
  if random.random() < 0.5:
726
- model_a, model_b = atla_model, other_model
727
  else:
728
- model_a, model_b = other_model, atla_model
729
  else:
730
- non_atla_models = [m for m in active_models if m != atla_model]
731
- model1, model2 = random.sample(non_atla_models, 2)
 
732
  model_a, model_b = (model1, model2) if random.random() < 0.5 else (model2, model1)
733
 
734
  # Get responses from models
@@ -750,9 +754,8 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
750
  is_prometheus_b = (model_data.get(model_b)['organization'] == 'Prometheus')
751
  is_atla_a = (model_data.get(model_a)['organization'] == 'Atla')
752
  is_atla_b = (model_data.get(model_b)['organization'] == 'Atla')
753
-
754
  is_flow_judge_a = (model_data.get(model_a)['organization'] == 'Flow AI')
755
- is_flow_judge_b = (model_data.get(model_b)['organization'] == 'Flow AI')
756
 
757
  if is_prometheus_a:
758
  score_a_val, critique_a_val = prometheus_parse_model_response(response_a)
 
4
  from collections import defaultdict
5
  from datetime import datetime
6
  import hashlib
7
+ import gradio as gr
8
 
9
  from dotenv import load_dotenv
 
10
  load_dotenv()
11
 
 
12
  from gen_api_answer import (
13
  get_model_response,
14
  parse_model_response,
15
  prometheus_parse_model_response,
16
  atla_parse_model_response,
17
+ flow_judge_parse_model_response
18
  )
19
 
20
  from random_sample_generation import (
 
23
  generate_ai_response
24
  )
25
  from db import add_vote, create_db_connection, get_votes
26
+
27
  from utils import Vote
28
+
29
  from common import (
30
  POLICY_CONTENT,
31
  ACKNOWLEDGEMENTS,
 
718
  else:
719
  model_a, model_b = other_model, atla_model
720
  else:
721
+ # For subsequent games, new models appears 40% of the time
722
+ if random.random() < 0.4:
723
+ # Randomly choose between new models
724
+ new_model = random.choice(["Atla-8B-preview", "Flow-Judge-1.0"])
725
+ other_models = [m for m in active_models if m not in [new_model, "Atla-8B-preview", "Flow-Judge-1.0"]]
726
  other_model = random.choice(other_models)
727
 
728
  if random.random() < 0.5:
729
+ model_a, model_b = new_model, other_model
730
  else:
731
+ model_a, model_b = other_model, new_model
732
  else:
733
+ # For other cases, exclude both Atla and Flow-Judge
734
+ non_special_models = [m for m in active_models if m not in ["Atla-8B-preview", "Flow-Judge-1.0"]]
735
+ model1, model2 = random.sample(non_special_models, 2)
736
  model_a, model_b = (model1, model2) if random.random() < 0.5 else (model2, model1)
737
 
738
  # Get responses from models
 
754
  is_prometheus_b = (model_data.get(model_b)['organization'] == 'Prometheus')
755
  is_atla_a = (model_data.get(model_a)['organization'] == 'Atla')
756
  is_atla_b = (model_data.get(model_b)['organization'] == 'Atla')
 
757
  is_flow_judge_a = (model_data.get(model_a)['organization'] == 'Flow AI')
758
+ is_flow_judge_b = (model_data.get(model_b)['organization'] == 'Flow AI')
759
 
760
  if is_prometheus_a:
761
  score_a_val, critique_a_val = prometheus_parse_model_response(response_a)