Weyaxi commited on
Commit
2fb8b40
1 Parent(s): e5bf898

adding the exact data used to train this model

Browse files
.gitattributes CHANGED
@@ -33,3 +33,16 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/airoboros_3.2_without_contextual_slimorca_orca_sharegpt.json filter=lfs diff=lfs merge=lfs -text
37
+ data/allenai_wild_chat_gpt4_english_toxic_random_half_4k_sharegpt.json filter=lfs diff=lfs merge=lfs -text
38
+ data/capybara_sharegpt.json filter=lfs diff=lfs merge=lfs -text
39
+ data/cot_alpaca_gpt4_extracted_openhermes_2.5_sharegpt.json filter=lfs diff=lfs merge=lfs -text
40
+ data/gpt4_data_lmys_1m_sharegpt.json filter=lfs diff=lfs merge=lfs -text
41
+ data/gpteacher-instruct-special-alpaca.json filter=lfs diff=lfs merge=lfs -text
42
+ data/merged_all.json filter=lfs diff=lfs merge=lfs -text
43
+ data/no_robots_sharegpt.json filter=lfs diff=lfs merge=lfs -text
44
+ data/pippa_bagel_repo_3k_sharegpt.json filter=lfs diff=lfs merge=lfs -text
45
+ data/sharegpt_gpt4_english.json filter=lfs diff=lfs merge=lfs -text
46
+ data/slimorca_dedup_filtered_95k_sharegpt.json filter=lfs diff=lfs merge=lfs -text
47
+ data/synthia-v1.3_sharegpt_12500.json filter=lfs diff=lfs merge=lfs -text
48
+ data/wizardlm_evol_instruct_70k_random_half.json filter=lfs diff=lfs merge=lfs -text
data/airoboros_3.2_without_contextual_slimorca_orca_sharegpt.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22b3140cce72bfaad2ae423c2c9bafd9ce128cf7820e8be3b9f6d415390c5689
3
+ size 89066312
data/allenai_wild_chat_gpt4_english_toxic_random_half_4k_sharegpt.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d82b9c5d276a699b3712b42d08b34de4ce334ab06ce185f3e55ef25a2e933852
3
+ size 41890772
data/capybara_sharegpt.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1947d28999416a2f468d1e09654cfdfc9bab8ccd03aa184598d20f0000dd6e4
3
+ size 76361785
data/cot_alpaca_gpt4_extracted_openhermes_2.5_sharegpt.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a037af5bf62d30414b85d036c09c0f860922f66c3e7fd701abf809f7fc94c32
3
+ size 40074062
data/everythinglm-data-v3_sharegpt.json ADDED
The diff for this file is too large to render. See raw diff
 
data/gpt4_data_lmys_1m_sharegpt.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39a31e3af56bb53e7c723faf0d0bc8b838091bdbed0eabcd0de881f9b4f8c2a9
3
+ size 41647312
data/gpteacher-instruct-special-alpaca.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:598d08b29655a0da79c9c1b290431c4d22d0533231a29cb048d1056e19d95c97
3
+ size 12187144
data/merged_all.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24ddc1528923e569d091d249e43866b0b3c8486fe6723a0c9431f613ec4b9f91
3
+ size 662813228
data/no_robots_sharegpt.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10627aef1c0b64006baee1d39fb087669ae465730fce2046af6762e692897c23
3
+ size 13110374
data/oasst_top1_from_fusechatmixture_sharegpt.json ADDED
The diff for this file is too large to render. See raw diff
 
data/pippa_bagel_repo_3k_sharegpt.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df2121d98d19e3e1e0fc873390486df71c2f502f309eaafd5af45f3c151cfe4f
3
+ size 18361804
data/remove_empty_output.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ with open('merged_all.json', 'r') as file:
4
+ data = json.load(file)
5
+
6
+ print(f"Normal len: {len(data)}")
7
+
8
+ data = [row for row in data if row["output"] != ""]
9
+
10
+ print(f"After len: {len(data)}")
11
+
12
+ with open('merged_all.json', 'w') as file:
13
+ json.dump(data, file, indent=1)
data/sharegpt_gpt4_english.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1512057e9002710757d6d8478b6678138fd878ac5844866d602b7cb7fd3e9c41
3
+ size 78552993
data/slimorca_dedup_filtered_95k_sharegpt.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:302e8d78b1f5f08bb7dd0ab7ded0204935003aea0b4c5bdbd8821d8924ab15f8
3
+ size 227955996
data/synthia-v1.3_sharegpt_12500.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbdbc7413a3c7fc65a900518f0db8627bb5ced53e1e8ee82613d09856c1b3b70
3
+ size 30638009
data/wizardlm_evol_instruct_70k_random_half.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41512805955ac47820191fd307a7eea1123dc1f163332f545137cee3182153b4
3
+ size 62604075