Spaces:
Sleeping
Sleeping
app
Browse files
app.py
CHANGED
@@ -30,18 +30,15 @@ question_types = sorted(set(ex["question_type"] for ex in fact_dataset))
|
|
30 |
|
31 |
def get_stats():
|
32 |
total_examples = Counter(ex["question_type"] for ex in fact_dataset)
|
33 |
-
curated_examples = Counter(row["question_type"] for row in examples
|
34 |
-
|
35 |
qt: {"total": total_examples[qt], "curated": curated_examples[qt]}
|
36 |
for qt in question_types
|
37 |
}
|
38 |
-
return stats
|
39 |
|
40 |
|
41 |
def get_example(selected_type=None):
|
42 |
evaluated_ids = set(row["example_id"] for row in examples())
|
43 |
-
print(f"completed: {evaluated_ids}")
|
44 |
-
|
45 |
available_examples = [
|
46 |
ex for ex in fact_dataset if ex["example_id"] not in evaluated_ids
|
47 |
]
|
@@ -49,12 +46,9 @@ def get_example(selected_type=None):
|
|
49 |
available_examples = [
|
50 |
ex for ex in available_examples if ex["question_type"] == selected_type
|
51 |
]
|
52 |
-
|
53 |
if not available_examples:
|
54 |
return None
|
55 |
-
|
56 |
example = random.choice(available_examples)
|
57 |
-
|
58 |
keep_keys = [
|
59 |
"example_id",
|
60 |
"question_type",
|
@@ -70,28 +64,40 @@ def get_example(selected_type=None):
|
|
70 |
"accuracy_oracle",
|
71 |
"accuracy_status",
|
72 |
]
|
73 |
-
|
74 |
return {k: example[k] for k in keep_keys if k in example}
|
75 |
|
76 |
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
style = Style("""
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
app
|
|
|
95 |
|
96 |
|
97 |
def render_stats(stats):
|
@@ -122,24 +128,8 @@ def render_example(example):
|
|
122 |
)
|
123 |
|
124 |
|
125 |
-
def upload_to_hf():
|
126 |
-
create_repo(
|
127 |
-
repo_id="rbiswasfc/iclr-eval-examples",
|
128 |
-
token=os.environ.get("HF_TOKEN"),
|
129 |
-
private=True,
|
130 |
-
repo_type="dataset",
|
131 |
-
exist_ok=True,
|
132 |
-
)
|
133 |
-
|
134 |
-
examples = db.t.examples
|
135 |
-
annotations = examples()
|
136 |
-
|
137 |
-
hf_ds = Dataset.from_list(annotations)
|
138 |
-
hf_ds.push_to_hub("rbiswasfc/iclr-eval-examples", token=os.environ.get("HF_TOKEN"))
|
139 |
-
|
140 |
-
|
141 |
@rt("/")
|
142 |
-
|
143 |
stats = get_stats()
|
144 |
example = get_example(question_type)
|
145 |
|
@@ -152,12 +142,10 @@ async def get(question_type: str = None):
|
|
152 |
hx_push_url="true",
|
153 |
)
|
154 |
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
else:
|
160 |
-
content = Div(
|
161 |
H2("Example"),
|
162 |
Div(
|
163 |
render_example(example),
|
@@ -167,22 +155,23 @@ async def get(question_type: str = None):
|
|
167 |
name="decision",
|
168 |
value="good",
|
169 |
hx_post="/evaluate",
|
170 |
-
hx_target="#example-
|
171 |
),
|
172 |
Button(
|
173 |
"Bad Example",
|
174 |
name="decision",
|
175 |
value="bad",
|
176 |
hx_post="/evaluate",
|
177 |
-
hx_target="#example-
|
178 |
),
|
179 |
-
# Hidden(name="example", value=json.dumps(example)),
|
180 |
Hidden(name="example_id", value=str(example["example_id"])),
|
|
|
181 |
id="evaluation-form",
|
182 |
),
|
183 |
id="example-container",
|
184 |
),
|
185 |
)
|
|
|
186 |
|
187 |
view_stats_link = A("Curation Stats", href="/stats", cls="view-stats-link")
|
188 |
|
@@ -196,14 +185,13 @@ async def get(question_type: str = None):
|
|
196 |
|
197 |
|
198 |
@rt("/stats")
|
199 |
-
|
200 |
stats = get_stats()
|
201 |
-
|
202 |
-
|
203 |
return Titled(
|
204 |
"Curation Statistics",
|
205 |
Div(
|
206 |
-
|
207 |
A("Back to Curation", href="/", cls="back-link"),
|
208 |
cls="container",
|
209 |
),
|
@@ -211,28 +199,52 @@ async def get():
|
|
211 |
|
212 |
|
213 |
@rt("/evaluate")
|
214 |
-
|
215 |
example_id = int(example_id)
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
upload_to_hf()
|
230 |
-
new_example = get_example(example_dict["question_type"])
|
231 |
|
|
|
232 |
if new_example is None:
|
233 |
-
return Div(
|
|
|
|
|
|
|
234 |
else:
|
235 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
|
237 |
|
238 |
# serve()
|
@@ -241,5 +253,4 @@ if __name__ == "__main__":
|
|
241 |
|
242 |
import uvicorn
|
243 |
|
244 |
-
# setup_hf_backup(app)
|
245 |
uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))
|
|
|
30 |
|
31 |
def get_stats():
|
32 |
total_examples = Counter(ex["question_type"] for ex in fact_dataset)
|
33 |
+
curated_examples = Counter(row["question_type"] for row in examples())
|
34 |
+
return {
|
35 |
qt: {"total": total_examples[qt], "curated": curated_examples[qt]}
|
36 |
for qt in question_types
|
37 |
}
|
|
|
38 |
|
39 |
|
40 |
def get_example(selected_type=None):
|
41 |
evaluated_ids = set(row["example_id"] for row in examples())
|
|
|
|
|
42 |
available_examples = [
|
43 |
ex for ex in fact_dataset if ex["example_id"] not in evaluated_ids
|
44 |
]
|
|
|
46 |
available_examples = [
|
47 |
ex for ex in available_examples if ex["question_type"] == selected_type
|
48 |
]
|
|
|
49 |
if not available_examples:
|
50 |
return None
|
|
|
51 |
example = random.choice(available_examples)
|
|
|
52 |
keep_keys = [
|
53 |
"example_id",
|
54 |
"question_type",
|
|
|
64 |
"accuracy_oracle",
|
65 |
"accuracy_status",
|
66 |
]
|
|
|
67 |
return {k: example[k] for k in keep_keys if k in example}
|
68 |
|
69 |
|
70 |
+
def upload_to_hf():
|
71 |
+
create_repo(
|
72 |
+
"rbiswasfc/iclr-eval-examples",
|
73 |
+
token=os.environ.get("HF_TOKEN"),
|
74 |
+
private=True,
|
75 |
+
repo_type="dataset",
|
76 |
+
exist_ok=True,
|
77 |
+
)
|
78 |
+
annotations = examples()
|
79 |
+
hf_ds = Dataset.from_list(annotations)
|
80 |
+
hf_ds.push_to_hub("rbiswasfc/iclr-eval-examples", token=os.environ.get("HF_TOKEN"))
|
81 |
+
|
82 |
+
|
83 |
style = Style("""
|
84 |
+
body { background-color: #1e1e1e; color: #d4d4d4; font-family: Arial, sans-serif; }
|
85 |
+
h1, h2, h3 { color: #61dafb; }
|
86 |
+
.example-container { margin-top: 20px; }
|
87 |
+
.example-table { border-collapse: collapse; width: 100%; }
|
88 |
+
.example-table th, .example-table td { border: 1px solid #3a3a3a; padding: 8px; text-align: left; }
|
89 |
+
.example-table th { background-color: #2a2a2a; color: #61dafb; }
|
90 |
+
.example-table td { color: #d4d4d4; }
|
91 |
+
#evaluation-form { margin-top: 20px; }
|
92 |
+
#evaluation-form button { margin-right: 10px; background-color: #0e639c; color: white; border: none; padding: 10px 20px; cursor: pointer; }
|
93 |
+
#evaluation-form button:hover { background-color: #1177bb; }
|
94 |
+
select { background-color: #2a2a2a; color: #d4d4d4; border: 1px solid #3a3a3a; padding: 5px; }
|
95 |
+
a { color: #61dafb; text-decoration: none; }
|
96 |
+
a:hover { text-decoration: underline; }
|
97 |
+
""")
|
98 |
+
|
99 |
+
app = FastHTML(hdrs=(style,))
|
100 |
+
rt = app.route
|
101 |
|
102 |
|
103 |
def render_stats(stats):
|
|
|
128 |
)
|
129 |
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
@rt("/")
|
132 |
+
def get(question_type: str = None):
|
133 |
stats = get_stats()
|
134 |
example = get_example(question_type)
|
135 |
|
|
|
142 |
hx_push_url="true",
|
143 |
)
|
144 |
|
145 |
+
content = (
|
146 |
+
Div(H2("All examples of this type have been evaluated!"), render_stats(stats))
|
147 |
+
if example is None
|
148 |
+
else Div(
|
|
|
|
|
149 |
H2("Example"),
|
150 |
Div(
|
151 |
render_example(example),
|
|
|
155 |
name="decision",
|
156 |
value="good",
|
157 |
hx_post="/evaluate",
|
158 |
+
hx_target="#example-container",
|
159 |
),
|
160 |
Button(
|
161 |
"Bad Example",
|
162 |
name="decision",
|
163 |
value="bad",
|
164 |
hx_post="/evaluate",
|
165 |
+
hx_target="#example-container",
|
166 |
),
|
|
|
167 |
Hidden(name="example_id", value=str(example["example_id"])),
|
168 |
+
Hidden(name="question_type", value=example["question_type"]),
|
169 |
id="evaluation-form",
|
170 |
),
|
171 |
id="example-container",
|
172 |
),
|
173 |
)
|
174 |
+
)
|
175 |
|
176 |
view_stats_link = A("Curation Stats", href="/stats", cls="view-stats-link")
|
177 |
|
|
|
185 |
|
186 |
|
187 |
@rt("/stats")
|
188 |
+
def get():
|
189 |
stats = get_stats()
|
190 |
+
stats_table = render_stats(stats)
|
|
|
191 |
return Titled(
|
192 |
"Curation Statistics",
|
193 |
Div(
|
194 |
+
stats_table,
|
195 |
A("Back to Curation", href="/", cls="back-link"),
|
196 |
cls="container",
|
197 |
),
|
|
|
199 |
|
200 |
|
201 |
@rt("/evaluate")
|
202 |
+
def post(decision: str, example_id: str, question_type: str):
|
203 |
example_id = int(example_id)
|
204 |
+
example = next((ex for ex in fact_dataset if ex["example_id"] == example_id), None)
|
205 |
+
if example:
|
206 |
+
examples.insert(
|
207 |
+
{
|
208 |
+
"id": len(examples()) + 1,
|
209 |
+
"example_id": example_id,
|
210 |
+
"question_type": question_type,
|
211 |
+
"question": example["question"],
|
212 |
+
"answer": example["answer"],
|
213 |
+
"decision": decision,
|
214 |
+
}
|
215 |
+
)
|
216 |
+
upload_to_hf()
|
|
|
|
|
217 |
|
218 |
+
new_example = get_example(question_type)
|
219 |
if new_example is None:
|
220 |
+
return Div(
|
221 |
+
H2("All examples of this type have been evaluated!"),
|
222 |
+
render_stats(get_stats()),
|
223 |
+
)
|
224 |
else:
|
225 |
+
return Div(
|
226 |
+
render_example(new_example),
|
227 |
+
Form(
|
228 |
+
Button(
|
229 |
+
"Good Example",
|
230 |
+
name="decision",
|
231 |
+
value="good",
|
232 |
+
hx_post="/evaluate",
|
233 |
+
hx_target="#example-container",
|
234 |
+
),
|
235 |
+
Button(
|
236 |
+
"Bad Example",
|
237 |
+
name="decision",
|
238 |
+
value="bad",
|
239 |
+
hx_post="/evaluate",
|
240 |
+
hx_target="#example-container",
|
241 |
+
),
|
242 |
+
Hidden(name="example_id", value=str(new_example["example_id"])),
|
243 |
+
Hidden(name="question_type", value=new_example["question_type"]),
|
244 |
+
id="evaluation-form",
|
245 |
+
),
|
246 |
+
id="example-container",
|
247 |
+
)
|
248 |
|
249 |
|
250 |
# serve()
|
|
|
253 |
|
254 |
import uvicorn
|
255 |
|
|
|
256 |
uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))
|