陈俊杰
commited on
Commit
·
932ed5c
1
Parent(s):
5447046
time
Browse files
app.py
CHANGED
@@ -268,7 +268,7 @@ This leaderboard is used to show the performance of the <strong>automatic evalua
|
|
268 |
beijing_time = datetime.now(beijing_tz)
|
269 |
|
270 |
# 在页面上动态显示当前北京时间
|
271 |
-
time_placeholder.write("
|
272 |
|
273 |
# 设置更新频率为每秒钟一次
|
274 |
time.sleep(1)
|
@@ -286,5 +286,10 @@ Please feel free to contact us! 😉
|
|
286 |
</p>""",unsafe_allow_html=True)
|
287 |
elif page == "References":
|
288 |
st.header("References")
|
289 |
-
st.markdown("""
|
|
|
|
|
|
|
|
|
|
|
290 |
|
|
|
268 |
beijing_time = datetime.now(beijing_tz)
|
269 |
|
270 |
# 在页面上动态显示当前北京时间
|
271 |
+
time_placeholder.write("Current Beijing Time: " + beijing_time.strftime('%Y-%m-%d %H:%M:%S'))
|
272 |
|
273 |
# 设置更新频率为每秒钟一次
|
274 |
time.sleep(1)
|
|
|
286 |
</p>""",unsafe_allow_html=True)
|
287 |
elif page == "References":
|
288 |
st.header("References")
|
289 |
+
st.markdown("""<p>[1] Mao R, Chen G, Zhang X, et al. GPTEval: A survey on assessments of ChatGPT and GPT-4. <a href="https://arxiv.org/pdf/2308.12488">pdf</a><br />
|
290 |
+
[2] Chang Y, Wang X, Wang J, et al. A survey on evaluation of large language models. <a href="https://dl.acm.org/doi/pdf/10.1145/3641289">pdf</a><br />
|
291 |
+
[3] Chan C M, Chen W, Su Y, et al. Chateval: Towards better llm-based evaluators through multi-agent debate. <a href="https://arxiv.org/pdf/2308.07201">pdf</a><br />
|
292 |
+
[4] Li R, Patel T, Du X. Prd: Peer rank and discussion improve large language model based evaluations. <a href="https://arxiv.org/pdf/2307.02762">pdf</a><br />
|
293 |
+
[5] Chu Z, Ai Q, Tu Y, et al. Pre: A peer review based large language model evaluator. <a href="https://arxiv.org/pdf/2401.15641">pdf</a></p>
|
294 |
+
""")
|
295 |
|