Spaces:
Running
Running
Commit
·
733bd44
1
Parent(s):
86d99c3
added basic framework
Browse files- __pycache__/app.cpython-311.pyc +0 -0
- __pycache__/texts.cpython-311.pyc +0 -0
- app.py +49 -6
- flagged/log.csv +2 -0
- texts.py +7 -0
__pycache__/app.cpython-311.pyc
ADDED
Binary file (926 Bytes). View file
|
|
__pycache__/texts.cpython-311.pyc
ADDED
Binary file (1.18 kB). View file
|
|
app.py
CHANGED
@@ -1,11 +1,54 @@
|
|
1 |
# gradio display leaderboard
|
2 |
|
3 |
-
import gradio as gr
|
4 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
|
10 |
-
|
11 |
-
iface.launch()
|
|
|
1 |
# gradio display leaderboard
|
2 |
|
|
|
3 |
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
import matplotlib
|
6 |
+
# matplotlib.use('macosx')
|
7 |
+
import gradio as gr
|
8 |
+
import matplotlib.pyplot as plt
|
9 |
+
import plotly.graph_objects as go
|
10 |
+
from apscheduler.schedulers.background import BackgroundScheduler
|
11 |
+
from texts import INTRODUCTION_TEXT, TITLE
|
12 |
+
|
13 |
+
df_lang = {
|
14 |
+
'en': pd.DataFrame({
|
15 |
+
'name': ['GPT-3', 'T5', 'BERT', 'RoBERTa', 'XLNet'],
|
16 |
+
'score': [0.75, 0.72, 0.68, 0.65, 0.62],
|
17 |
+
'rank': [1, 2, 3, 4, 5]
|
18 |
+
}),
|
19 |
+
'zh': pd.DataFrame({
|
20 |
+
'name': ['GPT-3', 'T5', 'BERT', 'RoBERTa', 'XLNet'],
|
21 |
+
'score': [0.75, 0.72, 0.68, 0.65, 0.62],
|
22 |
+
'rank': [1, 2, 3, 4, 5]
|
23 |
+
}),
|
24 |
+
}
|
25 |
+
|
26 |
+
def create_lang_leader_board(df):
|
27 |
+
headers = df.columns
|
28 |
+
types = ["str"] + ["number"] * (len(headers) - 1)
|
29 |
+
|
30 |
+
return gr.components.Dataframe(
|
31 |
+
value=df.values.tolist(),
|
32 |
+
headers=[col_name for col_name in headers],
|
33 |
+
datatype=types,
|
34 |
+
# max_rows=10,
|
35 |
+
)
|
36 |
+
|
37 |
+
|
38 |
+
def launch_gradio():
|
39 |
+
demo = gr.Blocks()
|
40 |
+
|
41 |
+
with demo:
|
42 |
+
gr.HTML(TITLE)
|
43 |
+
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
44 |
+
for key, df in df_lang.items():
|
45 |
+
with gr.Tab(key):
|
46 |
+
create_lang_leader_board(df)
|
47 |
+
|
48 |
+
demo.launch()
|
49 |
|
50 |
+
scheduler = BackgroundScheduler()
|
51 |
+
scheduler.add_job(launch_gradio, 'interval', hours=1)
|
52 |
+
scheduler.start()
|
53 |
|
54 |
+
launch_gradio()
|
|
flagged/log.csv
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
output,flag,username,timestamp
|
2 |
+
"{""headers"":[""Model"","" Zeroshot"","" Fewshot(3-shot)"","" Best Score""],""data"":[[""ChatGPT"",80,80,80],[""GPT-3"",23,23,23]],""metadata"":null}",,,2024-03-12 19:34:52.615692
|
texts.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
TITLE = '<h1 align="center" id="space-title">The OpsEval Leaderboard</h1>'
|
2 |
+
|
3 |
+
INTRODUCTION_TEXT = '''
|
4 |
+
The OpsEval dataset represents a pioneering effort in the evaluation of Artificial Intelligence for IT Operations (AIOps), focusing on the application of Large Language Models (LLMs) within this domain. In an era where IT operations are increasingly reliant on AI technologies for automation and efficiency, understanding the performance of LLMs in operational tasks becomes crucial. OpsEval offers a comprehensive task-oriented benchmark specifically designed for assessing LLMs in various crucial IT Ops scenarios.
|
5 |
+
|
6 |
+
This dataset is motivated by the emerging trend of utilizing AI in automated IT operations, as predicted by Gartner, and the remarkable capabilities exhibited by LLMs in NLP-related tasks. OpsEval aims to bridge the gap in evaluating these models' performance in AIOps tasks, including root cause analysis of failures, generation of operations and maintenance scripts, and summarizing alert information.
|
7 |
+
'''
|