ml-summit / src /scripts /nlp_datas.csv
facat's picture
init
2fc4496 unverified
raw
history blame
No virus
9.58 kB
dataset,year,size,task
acronym_identification,2020,8.556464,token-classification
aeslc,2019,11.643743,summarization
ag_news,2015,31.327765,text-classification
ajgt_twitter_ar,2017,0.107395,text-classification
alt,2016,47.849734,translation
amazon_polarity,2013,688.339454,text-classification
ambig_qa,2020,39.4018,question-answering
amttl,2018,0.685534,token-classification
app_reviews,2017,42.592679,text-classification
aqua_rat,2017,99.837029,question-answering
arsentd_lev,2018,0.392666,text-classification
arxiv_dataset,2019,0.0,translation
aslg_pc12,2012,12.773431,translation
assin,2016,2.249205,text-classification
assin2,2020,2.113646,text-classification
atomic,2019,19.083782,text2text-generation
autshumato,2010,32.124009,translation
bc2gm_corpus,2008,4.636753,token-classification
best2009,2009,13.89126,token-classification
billsum,2019,67.260676,summarization
biosses,2017,0.036324,text-classification
blimp,2019,29.576684,text-classification
blog_authorship_corpus,2006,632.898892,text-classification
bn_hate_speech,2020,0.974312,text-classification
break_data,2020,79.85539,text2text-generation
c3,2020,9.834177,question-answering
capes,2018,162.229298,translation
cdt,2019,0.375476,text-classification
cedr,2021,2.515548,text-classification
clickbait_news_bg,2017,8.569575,text-classification
climate_fever,2020,0.687133,text-classification
cmu_hinglish_dog,2018,8.749685,translation
cnn_dailymail,2015,1756.318416,summarization
coarse_discourse,2017,4.636201,text-classification
codah,2019,2.91078,question-answering
code_x_glue_cc_clone_detection_big_clone_bench,2014,47.955874,text-classification
code_x_glue_cc_clone_detection_poj104,2016,8.658581,text-retrieval
code_x_glue_cc_defect_detection,2019,61.685715,text-classification
code_x_glue_ct_code_to_text,2019,5191.751695,translation
code_x_glue_tc_nl_code_search_adv,2019,966.025624,text-retrieval
code_x_glue_tc_text_to_code,2018,100.769638,translation
competition_math,2021,20.327424,text2text-generation
conllpp,2019,4.8596,token-classification
cos_e,2019,10.830854,question-answering
covid_qa_castorini,2020,4.520993,question-answering
covid_qa_deepset,2020,4.418117,question-answering
covid_qa_ucsd,2020,0.0,question-answering
cryptonite,2021,46.98957,question-answering
cuad,2021,18.309308,question-answering
datacommons_factcheck,2019,1.343792,text-classification
dbpedia_14,2015,68.341743,text-classification
definite_pronoun_resolution,2012,0.227452,token-classification
dengue_filipino,2018,0.156014,text-classification
docred,2019,458.040413,text-retrieval
dream,2019,5.55819,question-answering
drop,2019,8.308692,question-answering
dyk,2013,0.685462,question-answering
emo,2019,3.362556,text-classification
ethos,2020,0.186755,text-classification
europa_eac_tm,2014,84.513984,translation
europa_ecdc_tm,2014,102.879264,translation
event2Mind,2018,1.30077,text2text-generation
exams,2020,4583.119779,question-answering
fake_news_filipino,2020,1.313458,text-classification
financial_phrasebank,2014,2.72756,text-classification
finer,2019,3.733127,token-classification
flores,2019,3.085562,translation
flue,2019,867.236435,text-classification
freebase_qa,2019,33.204999,question-answering
generated_reviews_enth,2020,59.490601,translation
germaner,2015,4.363657,token-classification
gigaword,2003,578.402958,summarization
glue,2019,1001.736261,text-classification
gooaq,2021,2111.358901,question-answering
google_wellformed_query,2018,1.157019,text-classification
grail_qa,2020,17.636773,question-answering
guardian_authorship,2017,49.611984,text-classification
gutenberg_time,2020,35.853781,text-classification
hard,2018,8.508677,text-classification
harem,2006,3.603154,token-classification
has_part,2020,7.437382,text-classification
hate_speech_filipino,2019,0.822927,text-classification
hatexplain,2020,12.848091,text-classification
hindi_discourse,2020,4.176677,text-classification
hlgd,2021,1.858948,text-classification
hotpot_qa,2018,1272.841016,question-answering
hover,2020,12.257835,text-retrieval
humicroedit,2019,3.242912,text-classification
hybrid_qa,2020,217.436855,question-answering
hyperpartisan_news_detection,2019,1004.195772,text-classification
igbo_english_machine_translation,2020,2.580255,translation
igbo_ner,2020,4.443355,token-classification
jnlpba,2004,3.171072,token-classification
journalists_questions,2016,0.271039,text-classification
kan_hope,2021,0.568972,text-classification
kinnews_kirnews,2020,65.127732,text-classification
kor_3i4k,2018,2.956114,text-classification
kor_nli,2020,126.339696,text-classification
kor_sae,2019,2.545926,text-classification
labr,2013,39.953712,text-classification
lama,2019,298.569546,text-retrieval
lc_quad,2019,3.959901,question-answering
lex_glue,2021,343.07123,question-answering
linnaeus,2010,18.204624,token-classification
lst20,2020,0.0,token-classification
mac_morpho,2015,2.463485,token-classification
masakhaner,2021,5.387138,token-classification
mbpp,2021,0.818796,text2text-generation
med_hop,2018,679.686122,question-answering
medical_dialog,2020,2082.878369,question-answering
medical_questions_pairs,2020,0.665688,text-classification
metooma,2020,0.408889,text-classification
metrec,2020,2.267882,text-classification
mlqa,2019,4150.871116,question-answering
mlsum,2020,6020.125939,summarization
mocha,2020,14.452311,question-answering
mrqa,2019,1479.518355,question-answering
msr_sqa,2017,4.796932,question-answering
msr_text_compression,2016,0.0,summarization
msr_zhen_translation_parity,2018,0.0,translation
multi_news,2019,756.785627,summarization
multi_re_qa,2020,75.245778,question-answering
multi_x_science_sum,2020,61.329304,summarization
multidoc2dial,2021,19.353432,question-answering
narrativeqa_manual,2018,22.638273,text2text-generation
ncbi_disease,2014,1.546492,token-classification
nchlt,2014,238.450416,token-classification
ncslgr,2007,4119.164501,translation
newsph_nli,2020,76.565287,text-classification
newspop,2018,30.338277,text-classification
newsqa,2017,0.0,question-answering
nkjp-ner,2012,0.821629,token-classification
norne,2020,246.710964,token-classification
norwegian_ner,2019,36.365354,token-classification
oclar,2019,0.382976,text-classification
offcombr,2017,0.185171,text-classification
offenseval_dravidian,2021,7.99568,text-classification
openai_humaneval,2021,0.044877,text2text-generation
openbookqa,2018,2.892196,question-answering
opinosis,2010,0.757398,summarization
opus100,2020,2610.517142,translation
opus_elhuyar,2012,44.468751,translation
orange_sum,2020,50.379977,summarization
parsinlu_reading_comprehension,2020,4.117863,question-answering
per_sent,2020,23.117196,text-classification
pn_summary,2020,89.591141,summarization
poem_sentiment,2020,0.04987,text-classification
pragmeval,2019,106.61448,text-classification
proto_qa,2020,7.493391,question-answering
pubmed_qa,2019,2063.6481,question-answering
qa_srl,2015,1.087729,question-answering
qanta,2019,170.754918,question-answering
qed,2020,14.083968,question-answering
reasoning_bg,2019,8.768975,question-answering
reddit_tifu,2018,1341.215712,summarization
riddle_sense,2021,2.083122,question-answering
ro_sent,2020,14.700057,text-classification
ro_sts,2021,1.267607,text-classification
ro_sts_parallel,2021,4.503388,translation
ronec,2019,14.675943,token-classification
ropes,2019,3.516917,question-answering
samsum,2019,2.9441,summarization
sberquad,2020,66.047276,question-answering
scan,2018,45.159884,text2text-generation
scb_mt_enth_2020,2020,276.831118,translation
scielo,2018,391.247854,translation
scientific_papers,2018,9009.292694,summarization
sciq,2017,2.821345,question-answering
sede,2021,6.318959,token-classification
selqa,2016,137.518059,question-answering
sem_eval_2020_task_11,2020,0.0,text-classification
sharc,2018,5.230207,question-answering
simple_questions_v2,2015,1270.30677,question-answering
sms_spam,2011,0.203415,text-classification
social_bias_frames,2020,9.464583,text2text-generation
species_800,2013,18.204624,token-classification
spider,2018,99.736136,text2text-generation
squad_it,2018,8.776531,question-answering
squad_kor_v1,2019,42.408533,question-answering
squad_kor_v2,2020,1373.763305,question-answering
stereoset,2020,25.00449,text-classification
stsb_mt_sv,2020,0.383047,text-classification
stsb_multi_mt,2021,12.992041,text-classification
super_glue,2019,58.368572,text-classification
swag,2018,84.49243,text-classification
swedish_medical_ner,2016,156.818136,token-classification
tep_en_fa_para,2011,16.353318,translation
text2log,2021,9.746473,translation
thai_toxicity_tweet,2019,0.19474,text-classification
thainer,2019,5.456461,token-classification
turkic_xwmt,2021,1157.61564,translation
tweet_eval,2020,18.982053,text-classification
tweet_qa,2019,1.57398,question-answering
tweets_ar_en_parallel,2020,8.812878,translation
tweets_hate_speech_detection,2018,4.738708,text-classification
universal_morphologies,2016,467.757708,token-classification
urdu_fake_news,2020,1.042653,text-classification
urdu_sentiment_corpus,2020,0.051583,text-classification
wiki_hop,2018,679.686122,question-answering
wiki_movies,2016,57.070041,question-answering
xcopa,2020,2.715704,question-answering
xed_en_fi,2020,9.68494,text-classification
xglue,2020,9634.964581,question-answering
xor_tydi_qa,2020,17.720586,question-answering
xquad_r,2020,196.497587,question-answering
xsum,2018,257.302866,summarization
yelp_review_full,2015,196.146755,text-classification
roman_urdu_hate_speech,2020,2.44736,text-classification
adv_glue,2021,0.243972,text-classification
gsm8k,2021,11.290661,text2text-generation
sst2,2014,7.439277,text-classification
gpt-3,2018,750000.0,text2text-generation