Spaces:
Running
Running
New models appear more often
Browse files
app.py
CHANGED
@@ -4,18 +4,17 @@ import random
|
|
4 |
from collections import defaultdict
|
5 |
from datetime import datetime
|
6 |
import hashlib
|
|
|
7 |
|
8 |
from dotenv import load_dotenv
|
9 |
-
|
10 |
load_dotenv()
|
11 |
|
12 |
-
import gradio as gr
|
13 |
from gen_api_answer import (
|
14 |
get_model_response,
|
15 |
parse_model_response,
|
16 |
prometheus_parse_model_response,
|
17 |
atla_parse_model_response,
|
18 |
-
flow_judge_parse_model_response
|
19 |
)
|
20 |
|
21 |
from random_sample_generation import (
|
@@ -24,7 +23,9 @@ from random_sample_generation import (
|
|
24 |
generate_ai_response
|
25 |
)
|
26 |
from db import add_vote, create_db_connection, get_votes
|
|
|
27 |
from utils import Vote
|
|
|
28 |
from common import (
|
29 |
POLICY_CONTENT,
|
30 |
ACKNOWLEDGEMENTS,
|
@@ -717,18 +718,21 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
717 |
else:
|
718 |
model_a, model_b = other_model, atla_model
|
719 |
else:
|
720 |
-
# For subsequent games, new
|
721 |
-
if random.random() < 0.
|
722 |
-
|
|
|
|
|
723 |
other_model = random.choice(other_models)
|
724 |
|
725 |
if random.random() < 0.5:
|
726 |
-
model_a, model_b =
|
727 |
else:
|
728 |
-
model_a, model_b = other_model,
|
729 |
else:
|
730 |
-
|
731 |
-
|
|
|
732 |
model_a, model_b = (model1, model2) if random.random() < 0.5 else (model2, model1)
|
733 |
|
734 |
# Get responses from models
|
@@ -750,9 +754,8 @@ with gr.Blocks(theme="default", css=CSS_STYLES) as demo:
|
|
750 |
is_prometheus_b = (model_data.get(model_b)['organization'] == 'Prometheus')
|
751 |
is_atla_a = (model_data.get(model_a)['organization'] == 'Atla')
|
752 |
is_atla_b = (model_data.get(model_b)['organization'] == 'Atla')
|
753 |
-
|
754 |
is_flow_judge_a = (model_data.get(model_a)['organization'] == 'Flow AI')
|
755 |
-
is_flow_judge_b = (model_data.get(model_b)['organization'] == 'Flow AI')
|
756 |
|
757 |
if is_prometheus_a:
|
758 |
score_a_val, critique_a_val = prometheus_parse_model_response(response_a)
|
|
|
4 |
from collections import defaultdict
|
5 |
from datetime import datetime
|
6 |
import hashlib
|
7 |
+
import gradio as gr
|
8 |
|
9 |
from dotenv import load_dotenv
|
|
|
10 |
load_dotenv()
|
11 |
|
|
|
12 |
from gen_api_answer import (
|
13 |
get_model_response,
|
14 |
parse_model_response,
|
15 |
prometheus_parse_model_response,
|
16 |
atla_parse_model_response,
|
17 |
+
flow_judge_parse_model_response
|
18 |
)
|
19 |
|
20 |
from random_sample_generation import (
|
|
|
23 |
generate_ai_response
|
24 |
)
|
25 |
from db import add_vote, create_db_connection, get_votes
|
26 |
+
|
27 |
from utils import Vote
|
28 |
+
|
29 |
from common import (
|
30 |
POLICY_CONTENT,
|
31 |
ACKNOWLEDGEMENTS,
|
|
|
718 |
else:
|
719 |
model_a, model_b = other_model, atla_model
|
720 |
else:
|
721 |
+
# For subsequent games, new models appears 40% of the time
|
722 |
+
if random.random() < 0.4:
|
723 |
+
# Randomly choose between new models
|
724 |
+
new_model = random.choice(["Atla-8B-preview", "Flow-Judge-1.0"])
|
725 |
+
other_models = [m for m in active_models if m not in [new_model, "Atla-8B-preview", "Flow-Judge-1.0"]]
|
726 |
other_model = random.choice(other_models)
|
727 |
|
728 |
if random.random() < 0.5:
|
729 |
+
model_a, model_b = new_model, other_model
|
730 |
else:
|
731 |
+
model_a, model_b = other_model, new_model
|
732 |
else:
|
733 |
+
# For other cases, exclude both Atla and Flow-Judge
|
734 |
+
non_special_models = [m for m in active_models if m not in ["Atla-8B-preview", "Flow-Judge-1.0"]]
|
735 |
+
model1, model2 = random.sample(non_special_models, 2)
|
736 |
model_a, model_b = (model1, model2) if random.random() < 0.5 else (model2, model1)
|
737 |
|
738 |
# Get responses from models
|
|
|
754 |
is_prometheus_b = (model_data.get(model_b)['organization'] == 'Prometheus')
|
755 |
is_atla_a = (model_data.get(model_a)['organization'] == 'Atla')
|
756 |
is_atla_b = (model_data.get(model_b)['organization'] == 'Atla')
|
|
|
757 |
is_flow_judge_a = (model_data.get(model_a)['organization'] == 'Flow AI')
|
758 |
+
is_flow_judge_b = (model_data.get(model_b)['organization'] == 'Flow AI')
|
759 |
|
760 |
if is_prometheus_a:
|
761 |
score_a_val, critique_a_val = prometheus_parse_model_response(response_a)
|