harsh4248 commited on
Commit
8d69fcf
1 Parent(s): 6d93b54

Training in progress epoch 0

Browse files
Files changed (3) hide show
  1. README.md +6 -8
  2. config.json +10 -126
  3. tf_model.h5 +2 -2
README.md CHANGED
@@ -15,10 +15,10 @@ probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [distilbert/distilbert-base-uncased](https://huggingface.co/distilbert/distilbert-base-uncased) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Train Loss: 0.8300
19
- - Validation Loss: 1.1957
20
- - Train Accuracy: 0.6026
21
- - Epoch: 2
22
 
23
  ## Model description
24
 
@@ -37,16 +37,14 @@ More information needed
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
- - optimizer: {'name': 'Adam', 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'jit_compile': True, 'is_legacy_optimizer': False, 'learning_rate': {'module': 'keras.optimizers.schedules', 'class_name': 'PolynomialDecay', 'config': {'initial_learning_rate': 2e-05, 'decay_steps': 70665, 'end_learning_rate': 0.0, 'power': 1.0, 'cycle': False, 'name': None}, 'registered_name': None}, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08, 'amsgrad': False}
41
  - training_precision: float32
42
 
43
  ### Training results
44
 
45
  | Train Loss | Validation Loss | Train Accuracy | Epoch |
46
  |:----------:|:---------------:|:--------------:|:-----:|
47
- | 1.3238 | 1.3084 | 0.5420 | 0 |
48
- | 1.0170 | 1.1609 | 0.5882 | 1 |
49
- | 0.8300 | 1.1957 | 0.6026 | 2 |
50
 
51
 
52
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [distilbert/distilbert-base-uncased](https://huggingface.co/distilbert/distilbert-base-uncased) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Train Loss: 0.2969
19
+ - Validation Loss: 0.1415
20
+ - Train Accuracy: 0.9552
21
+ - Epoch: 0
22
 
23
  ## Model description
24
 
 
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
+ - optimizer: {'name': 'Adam', 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'jit_compile': True, 'is_legacy_optimizer': False, 'learning_rate': {'module': 'keras.optimizers.schedules', 'class_name': 'PolynomialDecay', 'config': {'initial_learning_rate': 2e-05, 'decay_steps': 6560, 'end_learning_rate': 0.0, 'power': 1.0, 'cycle': False, 'name': None}, 'registered_name': None}, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08, 'amsgrad': False}
41
  - training_precision: float32
42
 
43
  ### Training results
44
 
45
  | Train Loss | Validation Loss | Train Accuracy | Epoch |
46
  |:----------:|:---------------:|:--------------:|:-----:|
47
+ | 0.2969 | 0.1415 | 0.9552 | 0 |
 
 
48
 
49
 
50
  ### Framework versions
config.json CHANGED
@@ -9,135 +9,19 @@
9
  "dropout": 0.1,
10
  "hidden_dim": 3072,
11
  "id2label": {
12
- "0": "Bloom-7B",
13
- "1": "Claude-Instant-v1",
14
- "2": "Claude-v1",
15
- "3": "Cohere-Command",
16
- "4": "Dolphin-2.5-Mixtral-8x7B",
17
- "5": "Dolphin-Mixtral-8x7B",
18
- "6": "Falcon-180B",
19
- "7": "Flan-T5-Base",
20
- "8": "Flan-T5-Large",
21
- "9": "Flan-T5-Small",
22
- "10": "Flan-T5-XL",
23
- "11": "Flan-T5-XXL",
24
- "12": "GLM-130B",
25
- "13": "GPT-3.5",
26
- "14": "GPT-4",
27
- "15": "GPT-J",
28
- "16": "GPT-NeoX",
29
- "17": "Gemini-Pro",
30
- "18": "Goliath-120B",
31
- "19": "Human",
32
- "20": "LLaMA-13B",
33
- "21": "LLaMA-2-70B",
34
- "22": "LLaMA-2-7B",
35
- "23": "LLaMA-30B",
36
- "24": "LLaMA-65B",
37
- "25": "LLaMA-7B",
38
- "26": "LZLV-70B",
39
- "27": "Mistral-7B",
40
- "28": "Mistral-7B-OpenOrca",
41
- "29": "Mixtral-8x7B",
42
- "30": "MythoMax-L2-13B",
43
- "31": "Neural-Chat-7B",
44
- "32": "Noromaid-20B",
45
- "33": "Nous-Capybara-34B",
46
- "34": "Nous-Capybara-7B",
47
- "35": "Nous-Hermes-LLaMA-2-13B",
48
- "36": "Nous-Hermes-LLaMA-2-70B",
49
- "37": "OPT-1.3B",
50
- "38": "OPT-125M",
51
- "39": "OPT-13B",
52
- "40": "OPT-2.7B",
53
- "41": "OPT-30B",
54
- "42": "OPT-350M",
55
- "43": "OPT-6.7B",
56
- "44": "OpenChat-3.5",
57
- "45": "OpenHermes-2-Mistral-7B",
58
- "46": "OpenHermes-2.5-Mistral-7B",
59
- "47": "PaLM-2",
60
- "48": "Psyfighter-13B",
61
- "49": "Psyfighter-2-13B",
62
- "50": "RWKV-5-World-3B",
63
- "51": "StripedHyena-Nous-7B",
64
- "52": "T0-11B",
65
- "53": "T0-3B",
66
- "54": "Text-Ada-001",
67
- "55": "Text-Babbage-001",
68
- "56": "Text-Curie-001",
69
- "57": "Text-Davinci-001",
70
- "58": "Text-Davinci-002",
71
- "59": "Text-Davinci-003",
72
- "60": "Toppy-M-7B",
73
- "61": "Unknown",
74
- "62": "YI-34B"
75
  },
76
  "initializer_range": 0.02,
77
  "label2id": {
78
- "Bloom-7B": 0,
79
- "Claude-Instant-v1": 1,
80
- "Claude-v1": 2,
81
- "Cohere-Command": 3,
82
- "Dolphin-2.5-Mixtral-8x7B": 4,
83
- "Dolphin-Mixtral-8x7B": 5,
84
- "Falcon-180B": 6,
85
- "Flan-T5-Base": 7,
86
- "Flan-T5-Large": 8,
87
- "Flan-T5-Small": 9,
88
- "Flan-T5-XL": 10,
89
- "Flan-T5-XXL": 11,
90
- "GLM-130B": 12,
91
- "GPT-3.5": 13,
92
- "GPT-4": 14,
93
- "GPT-J": 15,
94
- "GPT-NeoX": 16,
95
- "Gemini-Pro": 17,
96
- "Goliath-120B": 18,
97
- "Human": 19,
98
- "LLaMA-13B": 20,
99
- "LLaMA-2-70B": 21,
100
- "LLaMA-2-7B": 22,
101
- "LLaMA-30B": 23,
102
- "LLaMA-65B": 24,
103
- "LLaMA-7B": 25,
104
- "LZLV-70B": 26,
105
- "Mistral-7B": 27,
106
- "Mistral-7B-OpenOrca": 28,
107
- "Mixtral-8x7B": 29,
108
- "MythoMax-L2-13B": 30,
109
- "Neural-Chat-7B": 31,
110
- "Noromaid-20B": 32,
111
- "Nous-Capybara-34B": 33,
112
- "Nous-Capybara-7B": 34,
113
- "Nous-Hermes-LLaMA-2-13B": 35,
114
- "Nous-Hermes-LLaMA-2-70B": 36,
115
- "OPT-1.3B": 37,
116
- "OPT-125M": 38,
117
- "OPT-13B": 39,
118
- "OPT-2.7B": 40,
119
- "OPT-30B": 41,
120
- "OPT-350M": 42,
121
- "OPT-6.7B": 43,
122
- "OpenChat-3.5": 44,
123
- "OpenHermes-2-Mistral-7B": 45,
124
- "OpenHermes-2.5-Mistral-7B": 46,
125
- "PaLM-2": 47,
126
- "Psyfighter-13B": 48,
127
- "Psyfighter-2-13B": 49,
128
- "RWKV-5-World-3B": 50,
129
- "StripedHyena-Nous-7B": 51,
130
- "T0-11B": 52,
131
- "T0-3B": 53,
132
- "Text-Ada-001": 54,
133
- "Text-Babbage-001": 55,
134
- "Text-Curie-001": 56,
135
- "Text-Davinci-001": 57,
136
- "Text-Davinci-002": 58,
137
- "Text-Davinci-003": 59,
138
- "Toppy-M-7B": 60,
139
- "Unknown": 61,
140
- "YI-34B": 62
141
  },
142
  "max_position_embeddings": 512,
143
  "model_type": "distilbert",
 
9
  "dropout": 0.1,
10
  "hidden_dim": 3072,
11
  "id2label": {
12
+ "0": "GPT-3.5",
13
+ "1": "GPT-4",
14
+ "2": "Human",
15
+ "3": "OpenChat-3.5",
16
+ "4": "Unknown"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  },
18
  "initializer_range": 0.02,
19
  "label2id": {
20
+ "GPT-3.5": 0,
21
+ "GPT-4": 1,
22
+ "Human": 2,
23
+ "OpenChat-3.5": 3,
24
+ "Unknown": 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  },
26
  "max_position_embeddings": 512,
27
  "model_type": "distilbert",
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c002a25600ffc1788805c8e25ee623be3bc7a812824a952de41a09f408905e89
3
- size 268139200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4867fac077a22d478c876995d6784762360cd11d8aa0c276d98a091ac93fb40
3
+ size 267961024