Spaces:
Runtime error
Runtime error
test
#13
by
Tae
- opened
- app.py +2 -6
- requirements.txt +3 -4
app.py
CHANGED
@@ -21,7 +21,7 @@ FORMATTED_TASK_NAMES = sorted([" ".join(t.capitalize() for t in task.split("_"))
|
|
21 |
|
22 |
def download_submissions():
|
23 |
filt = DatasetFilter(benchmark="raft")
|
24 |
-
all_submissions = list_datasets(filter=filt,
|
25 |
submissions = []
|
26 |
|
27 |
for dataset in all_submissions:
|
@@ -83,8 +83,6 @@ st.set_page_config(layout="wide")
|
|
83 |
st.title("RAFT: Real-world Annotated Few-shot Tasks")
|
84 |
st.markdown(
|
85 |
"""
|
86 |
-
⚠️ **The RAFT benchmark is currently undergoing maintenance and is not accepting submissions at the moment. We apologise for the inconvenience.**
|
87 |
-
|
88 |
Large pre-trained language models have shown promise for few-shot learning, completing text-based tasks given only a few task-specific examples. Will models soon solve classification tasks that have so far been reserved for human research assistants?
|
89 |
|
90 |
[RAFT](https://raft.elicit.org) is a few-shot classification benchmark that tests language models:
|
@@ -99,9 +97,7 @@ To submit to RAFT, follow the instruction posted on [this page](https://huggingf
|
|
99 |
submissions = download_submissions()
|
100 |
print(f"INFO - downloaded {len(submissions)} submissions")
|
101 |
df = format_submissions(submissions)
|
102 |
-
styler =
|
103 |
-
**{"white-space": "pre-wrap", "text-align": "center"}
|
104 |
-
)
|
105 |
# hack to remove index column: https://discuss.streamlit.io/t/questions-on-st-table/6878/3
|
106 |
st.markdown(
|
107 |
"""
|
|
|
21 |
|
22 |
def download_submissions():
|
23 |
filt = DatasetFilter(benchmark="raft")
|
24 |
+
all_submissions = list_datasets(filter=filt, cardData=True, use_auth_token=auth_token)
|
25 |
submissions = []
|
26 |
|
27 |
for dataset in all_submissions:
|
|
|
83 |
st.title("RAFT: Real-world Annotated Few-shot Tasks")
|
84 |
st.markdown(
|
85 |
"""
|
|
|
|
|
86 |
Large pre-trained language models have shown promise for few-shot learning, completing text-based tasks given only a few task-specific examples. Will models soon solve classification tasks that have so far been reserved for human research assistants?
|
87 |
|
88 |
[RAFT](https://raft.elicit.org) is a few-shot classification benchmark that tests language models:
|
|
|
97 |
submissions = download_submissions()
|
98 |
print(f"INFO - downloaded {len(submissions)} submissions")
|
99 |
df = format_submissions(submissions)
|
100 |
+
styler = df.style.set_precision(3).set_properties(**{"white-space": "pre-wrap", "text-align": "center"})
|
|
|
|
|
101 |
# hack to remove index column: https://discuss.streamlit.io/t/questions-on-st-table/6878/3
|
102 |
st.markdown(
|
103 |
"""
|
requirements.txt
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
-
pandas
|
2 |
python-dotenv
|
3 |
protobuf~=3.19.0
|
4 |
-
huggingface-hub==0.
|
5 |
-
datasets==2.8.0
|
6 |
-
altair<5
|
|
|
1 |
+
pandas<=1.4
|
2 |
python-dotenv
|
3 |
protobuf~=3.19.0
|
4 |
+
huggingface-hub==0.9.1
|
5 |
+
datasets==2.8.0
|
|