Weyaxi commited on
Commit
d8ee461
1 Parent(s): e98e438

adding the exact data used to train this model

Browse files
.gitattributes CHANGED
@@ -34,3 +34,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  einstein-v4-7b.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  einstein-v4-7b.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
37
+ data/airoboros_3.2_without_contextual_slimorca_orca_sharegpt.json filter=lfs diff=lfs merge=lfs -text
38
+ data/capybara_sharegpt.json filter=lfs diff=lfs merge=lfs -text
39
+ data/cot_alpaca_gpt4_extracted_openhermes_2.5_sharegpt.json filter=lfs diff=lfs merge=lfs -text
40
+ data/merged_all.json filter=lfs diff=lfs merge=lfs -text
41
+ data/slimorca_dedup_filtered_95k_sharegpt.json filter=lfs diff=lfs merge=lfs -text
42
+ data/synthia-v1.3_sharegpt_12500.json filter=lfs diff=lfs merge=lfs -text
data/airoboros_3.2_without_contextual_slimorca_orca_sharegpt.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22b3140cce72bfaad2ae423c2c9bafd9ce128cf7820e8be3b9f6d415390c5689
3
+ size 89066312
data/capybara_sharegpt.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1947d28999416a2f468d1e09654cfdfc9bab8ccd03aa184598d20f0000dd6e4
3
+ size 76361785
data/cot_alpaca_gpt4_extracted_openhermes_2.5_sharegpt.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a037af5bf62d30414b85d036c09c0f860922f66c3e7fd701abf809f7fc94c32
3
+ size 40074062
data/merged_all.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85e34219effaa00e2962d6acff3697a21e5ef86fc7b899e5732d5359d8866b26
3
+ size 582406346
data/remove_empty_output.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ with open('merged_all.json', 'r') as file:
4
+ data = json.load(file)
5
+
6
+ print(f"Normal len: {len(data)}")
7
+
8
+ data = [row for row in data if row["output"] != ""]
9
+
10
+ print(f"After len: {len(data)}")
11
+
12
+ with open('merged_all.json', 'w') as file:
13
+ json.dump(data, file, indent=1)
data/slimorca_dedup_filtered_95k_sharegpt.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:302e8d78b1f5f08bb7dd0ab7ded0204935003aea0b4c5bdbd8821d8924ab15f8
3
+ size 227955996
data/synthia-v1.3_sharegpt_12500.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbdbc7413a3c7fc65a900518f0db8627bb5ced53e1e8ee82613d09856c1b3b70
3
+ size 30638009