Spaces:
Running
Running
Commit
•
84b862d
1
Parent(s):
a758df3
add tie
Browse files- app/db.py +1 -0
- app/init.py +1 -1
- app/ui_battle.py +14 -9
- app/vote.py +41 -9
- scripts/view_db.py +11 -4
app/db.py
CHANGED
@@ -53,6 +53,7 @@ def create_db():
|
|
53 |
prompt TEXT,
|
54 |
chosen_response TEXT,
|
55 |
rejected_response TEXT,
|
|
|
56 |
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
57 |
);
|
58 |
''')
|
|
|
53 |
prompt TEXT,
|
54 |
chosen_response TEXT,
|
55 |
rejected_response TEXT,
|
56 |
+
is_tie BOOLEAN DEFAULT FALSE,
|
57 |
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
58 |
);
|
59 |
''')
|
app/init.py
CHANGED
@@ -6,7 +6,7 @@ import os
|
|
6 |
|
7 |
scheduler = None
|
8 |
|
9 |
-
if
|
10 |
download_db()
|
11 |
# Sync local DB with remote repo every 5 minute (only if a change is detected)
|
12 |
scheduler = CommitScheduler(
|
|
|
6 |
|
7 |
scheduler = None
|
8 |
|
9 |
+
if False:
|
10 |
download_db()
|
11 |
# Sync local DB with remote repo every 5 minute (only if a change is detected)
|
12 |
scheduler = CommitScheduler(
|
app/ui_battle.py
CHANGED
@@ -5,11 +5,6 @@ from .messages import *
|
|
5 |
from .llm import *
|
6 |
import random
|
7 |
|
8 |
-
def disable():
|
9 |
-
return [gr.update(interactive=False), gr.update(interactive=False)]
|
10 |
-
def enable():
|
11 |
-
return [gr.update(interactive=True), gr.update(interactive=True)]
|
12 |
-
|
13 |
with gr.Blocks() as battle:
|
14 |
battle_useridstate = gr.State()
|
15 |
|
@@ -66,6 +61,8 @@ with gr.Blocks() as battle:
|
|
66 |
visible=False
|
67 |
)
|
68 |
|
|
|
|
|
69 |
def generate_responses(prompt):
|
70 |
if len(prompt.strip()) < MIN_PROMPT_LENGTH:
|
71 |
raise gr.Error(f"Prompt must be at least {MIN_PROMPT_LENGTH} characters")
|
@@ -78,11 +75,12 @@ with gr.Blocks() as battle:
|
|
78 |
return [
|
79 |
resp_a, # response1
|
80 |
resp_b, # response2
|
81 |
-
model_a, # model1_name
|
82 |
-
model_b, # model2_name
|
83 |
gr.update(visible=True), # response_row
|
84 |
gr.update(interactive=True, visible=True), # a_better
|
85 |
gr.update(interactive=True, visible=True), # b_better
|
|
|
86 |
gr.update(visible=False), # model1_name visibility
|
87 |
gr.update(visible=False) # model2_name visibility
|
88 |
]
|
@@ -99,6 +97,7 @@ with gr.Blocks() as battle:
|
|
99 |
response_row,
|
100 |
a_better,
|
101 |
b_better,
|
|
|
102 |
model1_name,
|
103 |
model2_name
|
104 |
]
|
@@ -107,13 +106,19 @@ with gr.Blocks() as battle:
|
|
107 |
a_better.click(
|
108 |
fn=a_is_better,
|
109 |
inputs=[model1_name, model2_name, battle_useridstate, prompt, response1, response2],
|
110 |
-
outputs=[a_better, b_better, model1_name, model2_name]
|
111 |
)
|
112 |
|
113 |
b_better.click(
|
114 |
fn=b_is_better,
|
115 |
inputs=[model1_name, model2_name, battle_useridstate, prompt, response1, response2],
|
116 |
-
outputs=[a_better, b_better, model1_name, model2_name]
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
)
|
118 |
|
119 |
def get_random_prompt():
|
|
|
5 |
from .llm import *
|
6 |
import random
|
7 |
|
|
|
|
|
|
|
|
|
|
|
8 |
with gr.Blocks() as battle:
|
9 |
battle_useridstate = gr.State()
|
10 |
|
|
|
61 |
visible=False
|
62 |
)
|
63 |
|
64 |
+
tie_button = gr.Button("Tie", variant='secondary', visible=False)
|
65 |
+
|
66 |
def generate_responses(prompt):
|
67 |
if len(prompt.strip()) < MIN_PROMPT_LENGTH:
|
68 |
raise gr.Error(f"Prompt must be at least {MIN_PROMPT_LENGTH} characters")
|
|
|
75 |
return [
|
76 |
resp_a, # response1
|
77 |
resp_b, # response2
|
78 |
+
model_a, # model1_name
|
79 |
+
model_b, # model2_name
|
80 |
gr.update(visible=True), # response_row
|
81 |
gr.update(interactive=True, visible=True), # a_better
|
82 |
gr.update(interactive=True, visible=True), # b_better
|
83 |
+
gr.update(interactive=True, visible=True), # tie_button
|
84 |
gr.update(visible=False), # model1_name visibility
|
85 |
gr.update(visible=False) # model2_name visibility
|
86 |
]
|
|
|
97 |
response_row,
|
98 |
a_better,
|
99 |
b_better,
|
100 |
+
tie_button,
|
101 |
model1_name,
|
102 |
model2_name
|
103 |
]
|
|
|
106 |
a_better.click(
|
107 |
fn=a_is_better,
|
108 |
inputs=[model1_name, model2_name, battle_useridstate, prompt, response1, response2],
|
109 |
+
outputs=[a_better, b_better, tie_button, model1_name, model2_name]
|
110 |
)
|
111 |
|
112 |
b_better.click(
|
113 |
fn=b_is_better,
|
114 |
inputs=[model1_name, model2_name, battle_useridstate, prompt, response1, response2],
|
115 |
+
outputs=[a_better, b_better, tie_button, model1_name, model2_name]
|
116 |
+
)
|
117 |
+
|
118 |
+
tie_button.click(
|
119 |
+
fn=tie_vote,
|
120 |
+
inputs=[model1_name, model2_name, battle_useridstate, prompt, response1, response2],
|
121 |
+
outputs=[a_better, b_better, tie_button, model1_name, model2_name]
|
122 |
)
|
123 |
|
124 |
def get_random_prompt():
|
app/vote.py
CHANGED
@@ -56,9 +56,9 @@ def a_is_better(model1, model2, userid, prompt="", response1="", response2=""):
|
|
56 |
print("Establishing database connection for voting.")
|
57 |
conn = get_db()
|
58 |
cursor = conn.cursor()
|
59 |
-
print(f"Inserting votelog: username={userid}, chosen={model1}, rejected={model2},
|
60 |
-
cursor.execute('INSERT INTO votelog (username, chosen, rejected, prompt, chosen_response, rejected_response) VALUES (?, ?, ?, ?, ?, ?)',
|
61 |
-
(str(userid), model1, model2, prompt, response1, response2))
|
62 |
if scheduler:
|
63 |
print("Scheduler detected. Acquiring scheduler lock before committing.")
|
64 |
with scheduler.lock:
|
@@ -88,9 +88,9 @@ def b_is_better(model1, model2, userid, prompt="", response1="", response2=""):
|
|
88 |
print("Establishing database connection for voting.")
|
89 |
conn = get_db()
|
90 |
cursor = conn.cursor()
|
91 |
-
print(f"Inserting votelog: username={userid}, chosen={model2}, rejected={model1},
|
92 |
-
cursor.execute('INSERT INTO votelog (username, chosen, rejected, prompt, chosen_response, rejected_response) VALUES (?, ?, ?, ?, ?, ?)',
|
93 |
-
(str(userid), model2, model1, prompt, response2, response1))
|
94 |
if scheduler:
|
95 |
print("Scheduler detected. Acquiring scheduler lock before committing.")
|
96 |
with scheduler.lock:
|
@@ -108,16 +108,48 @@ def b_is_better(model1, model2, userid, prompt="", response1="", response2=""):
|
|
108 |
print("Reloading UI after voting.")
|
109 |
return reload(model1, model2, userid, chose_b=True)
|
110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
# Reload
|
112 |
|
113 |
-
def reload(chosenmodel1=None, chosenmodel2=None, userid=None, chose_a=False, chose_b=False):
|
114 |
out = [
|
115 |
gr.update(interactive=False), # a_better
|
116 |
gr.update(interactive=False), # b_better
|
117 |
-
gr.update(
|
|
|
118 |
interactive=False,
|
119 |
visible=True), # model1_name
|
120 |
-
gr.update(value=f"Selected: {chosenmodel2}" if chose_b else chosenmodel2,
|
121 |
interactive=False,
|
122 |
visible=True) # model2_name
|
123 |
]
|
|
|
56 |
print("Establishing database connection for voting.")
|
57 |
conn = get_db()
|
58 |
cursor = conn.cursor()
|
59 |
+
print(f"Inserting votelog: username={userid}, chosen={model1}, rejected={model2}, is_tie=False")
|
60 |
+
cursor.execute('INSERT INTO votelog (username, chosen, rejected, prompt, chosen_response, rejected_response, is_tie) VALUES (?, ?, ?, ?, ?, ?, ?)',
|
61 |
+
(str(userid), model1, model2, prompt, response1, response2, False))
|
62 |
if scheduler:
|
63 |
print("Scheduler detected. Acquiring scheduler lock before committing.")
|
64 |
with scheduler.lock:
|
|
|
88 |
print("Establishing database connection for voting.")
|
89 |
conn = get_db()
|
90 |
cursor = conn.cursor()
|
91 |
+
print(f"Inserting votelog: username={userid}, chosen={model2}, rejected={model1}, is_tie=False")
|
92 |
+
cursor.execute('INSERT INTO votelog (username, chosen, rejected, prompt, chosen_response, rejected_response, is_tie) VALUES (?, ?, ?, ?, ?, ?, ?)',
|
93 |
+
(str(userid), model2, model1, prompt, response2, response1, False))
|
94 |
if scheduler:
|
95 |
print("Scheduler detected. Acquiring scheduler lock before committing.")
|
96 |
with scheduler.lock:
|
|
|
108 |
print("Reloading UI after voting.")
|
109 |
return reload(model1, model2, userid, chose_b=True)
|
110 |
|
111 |
+
def tie_vote(model1, model2, userid, prompt="", response1="", response2=""):
|
112 |
+
print("Processing vote: Tie.")
|
113 |
+
print(f"Comparing models: {model1} vs {model2}")
|
114 |
+
if not model1 in AVAILABLE_MODELS.keys() and not model1 in AVAILABLE_MODELS.values():
|
115 |
+
print(f"Model '{model1}' is not available. Raising error.")
|
116 |
+
raise gr.Error('Sorry, please try voting again.')
|
117 |
+
userid = mkuuid(userid)
|
118 |
+
print(f"Generated UUID for user: {userid}")
|
119 |
+
|
120 |
+
# Log the tie vote
|
121 |
+
print("Establishing database connection for tie vote.")
|
122 |
+
conn = get_db()
|
123 |
+
cursor = conn.cursor()
|
124 |
+
print(f"Inserting votelog: username={userid}, chosen={model1}, rejected={model2}, is_tie=True")
|
125 |
+
cursor.execute('INSERT INTO votelog (username, chosen, rejected, prompt, chosen_response, rejected_response, is_tie) VALUES (?, ?, ?, ?, ?, ?, ?)',
|
126 |
+
(str(userid), model1, model2, prompt, response1, response2, True))
|
127 |
+
if scheduler:
|
128 |
+
with scheduler.lock:
|
129 |
+
conn.commit()
|
130 |
+
else:
|
131 |
+
conn.commit()
|
132 |
+
cursor.close()
|
133 |
+
|
134 |
+
# Upvote both models
|
135 |
+
print(f"Upvoting both models in tie: {model1} and {model2}")
|
136 |
+
upvote_model(model1, str(userid), prompt, response1)
|
137 |
+
upvote_model(model2, str(userid), prompt, response2)
|
138 |
+
|
139 |
+
print("Reloading UI after voting.")
|
140 |
+
return reload(model1, model2, userid, is_tie=True)
|
141 |
+
|
142 |
# Reload
|
143 |
|
144 |
+
def reload(chosenmodel1=None, chosenmodel2=None, userid=None, chose_a=False, chose_b=False, is_tie=False):
|
145 |
out = [
|
146 |
gr.update(interactive=False), # a_better
|
147 |
gr.update(interactive=False), # b_better
|
148 |
+
gr.update(interactive=False), # tie_button
|
149 |
+
gr.update(value=f"Selected: {chosenmodel1}" if (chose_a or is_tie) else chosenmodel1,
|
150 |
interactive=False,
|
151 |
visible=True), # model1_name
|
152 |
+
gr.update(value=f"Selected: {chosenmodel2}" if (chose_b or is_tie) else chosenmodel2,
|
153 |
interactive=False,
|
154 |
visible=True) # model2_name
|
155 |
]
|
scripts/view_db.py
CHANGED
@@ -43,11 +43,18 @@ def view_db_content():
|
|
43 |
battles_df = pd.read_sql_query("""
|
44 |
SELECT
|
45 |
username,
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
substr(prompt, 1, 50) || '...' as prompt_preview,
|
49 |
-
substr(chosen_response, 1, 50) || '...' as
|
50 |
-
substr(rejected_response, 1, 50) || '...' as
|
|
|
51 |
datetime(timestamp, 'localtime') as local_time
|
52 |
FROM votelog
|
53 |
ORDER BY timestamp DESC
|
|
|
43 |
battles_df = pd.read_sql_query("""
|
44 |
SELECT
|
45 |
username,
|
46 |
+
CASE
|
47 |
+
WHEN is_tie THEN 'TIE'
|
48 |
+
ELSE chosen
|
49 |
+
END as winner,
|
50 |
+
CASE
|
51 |
+
WHEN is_tie THEN 'TIE'
|
52 |
+
ELSE rejected
|
53 |
+
END as loser,
|
54 |
substr(prompt, 1, 50) || '...' as prompt_preview,
|
55 |
+
substr(chosen_response, 1, 50) || '...' as response_a,
|
56 |
+
substr(rejected_response, 1, 50) || '...' as response_b,
|
57 |
+
is_tie,
|
58 |
datetime(timestamp, 'localtime') as local_time
|
59 |
FROM votelog
|
60 |
ORDER BY timestamp DESC
|