all-distilroberta-v1 / data_config.json
nreimers's picture
upload
ac8d0d1
raw
history blame
15.7 kB
[
{
"name": "stackexchange_title_body/skeptics.stackexchange.com.jsonl.gz",
"lines": 10009,
"weight": 1
},
{
"name": "stackexchange_title_body/writers.stackexchange.com.jsonl.gz",
"lines": 10157,
"weight": 1
},
{
"name": "stackexchange_title_body/astronomy.stackexchange.com.jsonl.gz",
"lines": 10462,
"weight": 1
},
{
"name": "stackexchange_title_body/vi.stackexchange.com.jsonl.gz",
"lines": 10551,
"weight": 1
},
{
"name": "stackexchange_title_body/cstheory.stackexchange.com.jsonl.gz",
"lines": 10642,
"weight": 1
},
{
"name": "stackexchange_title_body/engineering.stackexchange.com.jsonl.gz",
"lines": 10753,
"weight": 1
},
{
"name": "stackexchange_title_body/french.stackexchange.com.jsonl.gz",
"lines": 10794,
"weight": 1
},
{
"name": "stackexchange_title_body/economics.stackexchange.com.jsonl.gz",
"lines": 11115,
"weight": 1
},
{
"name": "stackexchange_title_body/anime.stackexchange.com.jsonl.gz",
"lines": 11444,
"weight": 1
},
{
"name": "stackexchange_title_body/islam.stackexchange.com.jsonl.gz",
"lines": 11853,
"weight": 1
},
{
"name": "stackexchange_title_body/expressionengine.stackexchange.com.jsonl.gz",
"lines": 11866,
"weight": 1
},
{
"name": "stackexchange_title_body/politics.stackexchange.com.jsonl.gz",
"lines": 11894,
"weight": 1
},
{
"name": "stackexchange_title_body/history.stackexchange.com.jsonl.gz",
"lines": 12021,
"weight": 1
},
{
"name": "stackexchange_title_body/christianity.stackexchange.com.jsonl.gz",
"lines": 12108,
"weight": 1
},
{
"name": "stackexchange_title_body/boardgames.stackexchange.com.jsonl.gz",
"lines": 12149,
"weight": 1
},
{
"name": "stackexchange_title_body/civicrm.stackexchange.com.jsonl.gz",
"lines": 12543,
"weight": 1
},
{
"name": "stackexchange_title_body/craftcms.stackexchange.com.jsonl.gz",
"lines": 12574,
"weight": 1
},
{
"name": "stackexchange_title_body/hinduism.stackexchange.com.jsonl.gz",
"lines": 13450,
"weight": 1
},
{
"name": "stackexchange_title_body/networkengineering.stackexchange.com.jsonl.gz",
"lines": 13454,
"weight": 1
},
{
"name": "stackexchange_title_body/german.stackexchange.com.jsonl.gz",
"lines": 13950,
"weight": 1
},
{
"name": "stackexchange_title_body/philosophy.stackexchange.com.jsonl.gz",
"lines": 14829,
"weight": 1
},
{
"name": "stackexchange_title_body/gardening.stackexchange.com.jsonl.gz",
"lines": 15136,
"weight": 1
},
{
"name": "stackexchange_title_body/space.stackexchange.com.jsonl.gz",
"lines": 15142,
"weight": 1
},
{
"name": "stackexchange_title_body/bicycles.stackexchange.com.jsonl.gz",
"lines": 16353,
"weight": 1
},
{
"name": "stackexchange_title_body/quant.stackexchange.com.jsonl.gz",
"lines": 17261,
"weight": 1
},
{
"name": "stackexchange_title_body/puzzling.stackexchange.com.jsonl.gz",
"lines": 17851,
"weight": 1
},
{
"name": "stackexchange_title_body/law.stackexchange.com.jsonl.gz",
"lines": 17941,
"weight": 1
},
{
"name": "stackexchange_title_body/arduino.stackexchange.com.jsonl.gz",
"lines": 19553,
"weight": 1
},
{
"name": "stackexchange_title_body/aviation.stackexchange.com.jsonl.gz",
"lines": 20139,
"weight": 1
},
{
"name": "stackexchange_title_body/softwarerecs.stackexchange.com.jsonl.gz",
"lines": 20142,
"weight": 1
},
{
"name": "stackexchange_title_body/movies.stackexchange.com.jsonl.gz",
"lines": 20181,
"weight": 1
},
{
"name": "stackexchange_title_body/music.stackexchange.com.jsonl.gz",
"lines": 20636,
"weight": 1
},
{
"name": "stackexchange_title_body/emacs.stackexchange.com.jsonl.gz",
"lines": 21055,
"weight": 1
},
{
"name": "stackexchange_title_body/dsp.stackexchange.com.jsonl.gz",
"lines": 21252,
"weight": 1
},
{
"name": "flickr30k_captions.jsonl.gz",
"lines": 317695,
"weight": 1
},
{
"name": "coco_captions.jsonl.gz",
"lines": 828395,
"weight": 1
},
{
"name": "codesearchnet.jsonl.gz",
"lines": 1151414,
"weight": 1
},
{
"name": "stackexchange_title_body/japanese.stackexchange.com.jsonl.gz",
"lines": 22056,
"weight": 2
},
{
"name": "stackexchange_title_body/mechanics.stackexchange.com.jsonl.gz",
"lines": 22868,
"weight": 2
},
{
"name": "stackexchange_title_body/crypto.stackexchange.com.jsonl.gz",
"lines": 23231,
"weight": 2
},
{
"name": "stackexchange_title_body/cooking.stackexchange.com.jsonl.gz",
"lines": 23705,
"weight": 2
},
{
"name": "stackexchange_title_body/photo.stackexchange.com.jsonl.gz",
"lines": 23753,
"weight": 2
},
{
"name": "stackexchange_title_body/workplace.stackexchange.com.jsonl.gz",
"lines": 24189,
"weight": 2
},
{
"name": "stackexchange_title_body/biology.stackexchange.com.jsonl.gz",
"lines": 24447,
"weight": 2
},
{
"name": "stackexchange_title_body/bitcoin.stackexchange.com.jsonl.gz",
"lines": 25374,
"weight": 2
},
{
"name": "stackexchange_title_body/worldbuilding.stackexchange.com.jsonl.gz",
"lines": 26763,
"weight": 2
},
{
"name": "stackexchange_title_body/datascience.stackexchange.com.jsonl.gz",
"lines": 27397,
"weight": 2
},
{
"name": "stackexchange_title_body/ux.stackexchange.com.jsonl.gz",
"lines": 29403,
"weight": 2
},
{
"name": "stackexchange_title_body/webapps.stackexchange.com.jsonl.gz",
"lines": 29697,
"weight": 2
},
{
"name": "stackexchange_title_body/graphicdesign.stackexchange.com.jsonl.gz",
"lines": 30233,
"weight": 2
},
{
"name": "stackexchange_title_body/raspberrypi.stackexchange.com.jsonl.gz",
"lines": 30625,
"weight": 2
},
{
"name": "stackexchange_title_body/money.stackexchange.com.jsonl.gz",
"lines": 32021,
"weight": 2
},
{
"name": "stackexchange_title_body/judaism.stackexchange.com.jsonl.gz",
"lines": 32028,
"weight": 2
},
{
"name": "stackexchange_title_body/ethereum.stackexchange.com.jsonl.gz",
"lines": 32760,
"weight": 2
},
{
"name": "stackexchange_title_body/academia.stackexchange.com.jsonl.gz",
"lines": 34331,
"weight": 2
},
{
"name": "stackexchange_title_body/chemistry.stackexchange.com.jsonl.gz",
"lines": 34506,
"weight": 2
},
{
"name": "stackexchange_title_body/webmasters.stackexchange.com.jsonl.gz",
"lines": 34559,
"weight": 2
},
{
"name": "stackexchange_title_body/meta.stackoverflow.com.jsonl.gz",
"lines": 36456,
"weight": 2
},
{
"name": "stackexchange_title_body/cs.stackexchange.com.jsonl.gz",
"lines": 38314,
"weight": 2
},
{
"name": "stackexchange_title_body/travel.stackexchange.com.jsonl.gz",
"lines": 41227,
"weight": 2
},
{
"name": "stackexchange_title_body/rpg.stackexchange.com.jsonl.gz",
"lines": 42303,
"weight": 2
},
{
"name": "stackexchange_title_body/codereview.stackexchange.com.jsonl.gz",
"lines": 45765,
"weight": 3
},
{
"name": "stackexchange_title_body/gamedev.stackexchange.com.jsonl.gz",
"lines": 46485,
"weight": 3
},
{
"name": "stackexchange_title_body/android.stackexchange.com.jsonl.gz",
"lines": 51608,
"weight": 3
},
{
"name": "stackexchange_title_body/softwareengineering.stackexchange.com.jsonl.gz",
"lines": 53942,
"weight": 3
},
{
"name": "stackexchange_title_body/security.stackexchange.com.jsonl.gz",
"lines": 58000,
"weight": 3
},
{
"name": "stackexchange_title_body/diy.stackexchange.com.jsonl.gz",
"lines": 60083,
"weight": 3
},
{
"name": "stackexchange_title_body/scifi.stackexchange.com.jsonl.gz",
"lines": 61528,
"weight": 3
},
{
"name": "stackexchange_title_body/mathematica.stackexchange.com.jsonl.gz",
"lines": 73131,
"weight": 4
},
{
"name": "TriviaQA_pairs.jsonl.gz",
"lines": 73346,
"weight": 4
},
{
"name": "stackexchange_title_body/drupal.stackexchange.com.jsonl.gz",
"lines": 79717,
"weight": 4
},
{
"name": "stackexchange_title_body/blender.stackexchange.com.jsonl.gz",
"lines": 80766,
"weight": 4
},
{
"name": "stackexchange_title_body/dba.stackexchange.com.jsonl.gz",
"lines": 81871,
"weight": 4
},
{
"name": "stackexchange_title_body/ell.stackexchange.com.jsonl.gz",
"lines": 83271,
"weight": 4
},
{
"name": "stackexchange_title_body/meta.stackexchange.com.jsonl.gz",
"lines": 83510,
"weight": 4
},
{
"name": "squad_pairs.jsonl.gz",
"lines": 87599,
"weight": 5
},
{
"name": "stackexchange_title_body/gaming.stackexchange.com.jsonl.gz",
"lines": 88912,
"weight": 5
},
{
"name": "stackexchange_title_body/sharepoint.stackexchange.com.jsonl.gz",
"lines": 94011,
"weight": 5
},
{
"name": "stackexchange_title_body/magento.stackexchange.com.jsonl.gz",
"lines": 99991,
"weight": 5
},
{
"name": "NQ-train_pairs.jsonl.gz",
"lines": 100231,
"weight": 5
},
{
"name": "stackexchange_title_body/wordpress.stackexchange.com.jsonl.gz",
"lines": 100474,
"weight": 5
},
{
"name": "SimpleWiki.jsonl.gz",
"lines": 102225,
"weight": 5
},
{
"name": "quora_duplicates_triplets.jsonl.gz",
"lines": 103663,
"weight": 5
},
{
"name": "stackexchange_title_body/salesforce.stackexchange.com.jsonl.gz",
"lines": 105260,
"weight": 5
},
{
"name": "stackexchange_title_body/english.stackexchange.com.jsonl.gz",
"lines": 109522,
"weight": 6
},
{
"name": "stackexchange_title_body/apple.stackexchange.com.jsonl.gz",
"lines": 110622,
"weight": 6
},
{
"name": "altlex.jsonl.gz",
"lines": 112696,
"weight": 6
},
{
"name": "stackexchange_title_body/mathoverflow.net.jsonl.gz",
"lines": 120851,
"weight": 6
},
{
"name": "wikihow.jsonl.gz",
"lines": 128542,
"weight": 6
},
{
"name": "stackexchange_title_body/gis.stackexchange.com.jsonl.gz",
"lines": 131000,
"weight": 7
},
{
"name": "stackexchange_title_body/electronics.stackexchange.com.jsonl.gz",
"lines": 143582,
"weight": 7
},
{
"name": "stackexchange_title_body/physics.stackexchange.com.jsonl.gz",
"lines": 173307,
"weight": 9
},
{
"name": "stackexchange_title_body/stats.stackexchange.com.jsonl.gz",
"lines": 173466,
"weight": 9
},
{
"name": "sentence-compression.jsonl.gz",
"lines": 180000,
"weight": 9
},
{
"name": "stackexchange_title_body/unix.stackexchange.com.jsonl.gz",
"lines": 185997,
"weight": 9
},
{
"name": "stackexchange_title_body/tex.stackexchange.com.jsonl.gz",
"lines": 202954,
"weight": 10
},
{
"name": "stackexchange_duplicate_questions_title-body_title-body.jsonl.gz",
"lines": 250460,
"weight": 12
},
{
"name": "stackexchange_duplicate_questions_body_body.jsonl.gz",
"lines": 250519,
"weight": 12
},
{
"name": "stackexchange_title_body/serverfault.com.jsonl.gz",
"lines": 270904,
"weight": 13
},
{
"name": "AllNLI.jsonl.gz",
"lines": 277230,
"weight": 13
},
{
"name": "stackexchange_duplicate_questions_title_title.jsonl.gz",
"lines": 304525,
"weight": 15
},
{
"name": "eli5_question_answer.jsonl.gz",
"lines": 325475,
"weight": 16
},
{
"name": "specter_train_triples.jsonl.gz",
"lines": 684100,
"weight": 16
},
{
"name": "stackexchange_title_body/askubuntu.com.jsonl.gz",
"lines": 347925,
"weight": 17
},
{
"name": "stackexchange_title_body/superuser.com.jsonl.gz",
"lines": 435463,
"weight": 21
},
{
"name": "stackexchange_title_body/small_stackexchanges.jsonl.gz",
"lines": 448146,
"weight": 21
},
{
"name": "S2ORC_title_abstract.jsonl.gz",
"lines": 41769185,
"weight": 23
},
{
"name": "S2ORC_citation_pairs.jsonl.gz",
"lines": 52603982,
"weight": 12
},
{
"name": "S2ORC_citation_pairs_abstract.jsonl.gz",
"lines": 116288806,
"weight": 12
},
{
"name": "PAQ_pairs.jsonl.gz",
"lines": 64371441,
"weight": 23
},
{
"name": "WikiAnswers_pairs.jsonl.gz",
"lines": 77427422,
"weight": 23
},
{
"name": "searchQA_question_top5_snippets_merged.jsonl.gz",
"lines": 582261,
"weight": 28
},
{
"name": "yahoo_answers_title_question.jsonl.gz",
"lines": 659896,
"weight": 31
},
{
"name": "yahoo_answers_question_answer.jsonl.gz",
"lines": 681164,
"weight": 32
},
{
"name": "yahoo_answers_title_answer.jsonl.gz",
"lines": 1198260,
"weight": 47
},
{
"name": "stackexchange_title_body/math.stackexchange.com.jsonl.gz",
"lines": 1338443,
"weight": 47
},
{
"name": "gooaq_pairs.jsonl.gz",
"lines": 3012496,
"weight": 47
},
{
"name": "msmarco-query_passage_negative.jsonl.gz",
"lines": 9144553,
"weight": 47
},
{
"name": "stackexchange_title_body/stackoverflow.com-Posts.jsonl.gz",
"lines": 18562443,
"weight": 47
},
{"name": "reddit/reddit_2015.jsonl.gz", "weight": 50},
{"name": "reddit/reddit_2016.jsonl.gz", "weight": 50},
{"name": "reddit/reddit_2017.jsonl.gz", "weight": 50},
{"name": "reddit/reddit_2018.jsonl.gz", "weight": 50}
]