marcev commited on
Commit
f858656
1 Parent(s): 47dc101

Update label mappings for FinanceBERT

Browse files
anotherscript.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
2
+
3
+ # Correctly formatted path using a raw string to prevent escape sequence errors
4
+ model_path = r'C:\Users\marco\financebert'
5
+
6
+ # Load the tokenizer and model
7
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
8
+ model = AutoModelForSequenceClassification.from_pretrained(model_path)
9
+
10
+ # Update the model configuration with label mappings
11
+ model.config.id2label = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}
12
+ model.config.label2id = {'Negative': 0, 'Neutral': 1, 'Positive': 2}
13
+
14
+ # Save the tokenizer and model with the updated configuration
15
+ tokenizer.save_pretrained(model_path)
16
+ model.save_pretrained(model_path)
17
+
18
+ print("Tokenizer and model saved with updated labels.")
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "bert-base-uncased",
3
  "architectures": [
4
  "BertForSequenceClassification"
5
  ],
@@ -10,16 +10,16 @@
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 768,
12
  "id2label": {
13
- "0": "LABEL_0",
14
- "1": "LABEL_1",
15
- "2": "LABEL_2"
16
  },
17
  "initializer_range": 0.02,
18
  "intermediate_size": 3072,
19
  "label2id": {
20
- "LABEL_0": 0,
21
- "LABEL_1": 1,
22
- "LABEL_2": 2
23
  },
24
  "layer_norm_eps": 1e-12,
25
  "max_position_embeddings": 512,
 
1
  {
2
+ "_name_or_path": "C:\\Users\\marco\\financebert",
3
  "architectures": [
4
  "BertForSequenceClassification"
5
  ],
 
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 768,
12
  "id2label": {
13
+ "0": "Negative",
14
+ "1": "Neutral",
15
+ "2": "Positive"
16
  },
17
  "initializer_range": 0.02,
18
  "intermediate_size": 3072,
19
  "label2id": {
20
+ "Negative": 0,
21
+ "Neutral": 1,
22
+ "Positive": 2
23
  },
24
  "layer_norm_eps": 1e-12,
25
  "max_position_embeddings": 512,
model_update.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForSequenceClassification
2
+
3
+ # Load your model
4
+ model = AutoModelForSequenceClassification.from_pretrained('path_to_your_local_model')
5
+
6
+ # Update label mapping
7
+ model.config.id2label = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}
8
+ model.config.label2id = {'Negative': 0, 'Neutral': 1, 'Positive': 2}
script.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+
3
+ try:
4
+ with open(r'C:\Users\marco\financebert\model.safetensors', 'rb') as f:
5
+ model = pickle.load(f)
6
+ print("Model loaded successfully using pickle:", model)
7
+ except Exception as e:
8
+ print("Failed to load model using pickle:", str(e))
special_tokens_map.json CHANGED
@@ -1,7 +1,37 @@
1
  {
2
- "cls_token": "[CLS]",
3
- "mask_token": "[MASK]",
4
- "pad_token": "[PAD]",
5
- "sep_token": "[SEP]",
6
- "unk_token": "[UNK]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  }
 
1
  {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
  }
tokenizer_config.json CHANGED
@@ -45,11 +45,18 @@
45
  "cls_token": "[CLS]",
46
  "do_lower_case": true,
47
  "mask_token": "[MASK]",
 
48
  "model_max_length": 512,
 
49
  "pad_token": "[PAD]",
 
 
50
  "sep_token": "[SEP]",
 
51
  "strip_accents": null,
52
  "tokenize_chinese_chars": true,
53
  "tokenizer_class": "BertTokenizer",
 
 
54
  "unk_token": "[UNK]"
55
  }
 
45
  "cls_token": "[CLS]",
46
  "do_lower_case": true,
47
  "mask_token": "[MASK]",
48
+ "max_length": 512,
49
  "model_max_length": 512,
50
+ "pad_to_multiple_of": null,
51
  "pad_token": "[PAD]",
52
+ "pad_token_type_id": 0,
53
+ "padding_side": "right",
54
  "sep_token": "[SEP]",
55
+ "stride": 0,
56
  "strip_accents": null,
57
  "tokenize_chinese_chars": true,
58
  "tokenizer_class": "BertTokenizer",
59
+ "truncation_side": "right",
60
+ "truncation_strategy": "longest_first",
61
  "unk_token": "[UNK]"
62
  }
your_script_name.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ model_path = r'C:\Users\marco\financebert\model.safetensors'
4
+ try:
5
+ # Try loading the model directly
6
+ model = torch.load(model_path)
7
+ print("Model loaded successfully:", model)
8
+ except Exception as e:
9
+ print("Failed to load the model directly:", str(e))
10
+
11
+ # If direct loading fails, consider that the file might need handling of specific layers or configs
12
+ try:
13
+ # Sometimes models are wrapped in a dictionary or other structures
14
+ model_data = torch.load(model_path, map_location=torch.device('cpu'))
15
+ print("Model data loaded, attempt to extract model:", model_data.keys())
16
+
17
+ # If model is under a specific key or requires further processing
18
+ if 'model' in model_data:
19
+ model = model_data['model']
20
+ print("Extracted model from dictionary:", model)
21
+ else:
22
+ print("Check the keys in model_data and adjust accordingly")
23
+ except Exception as e2:
24
+ print("Failed in adjusted loading approach:", str(e2))