ageng-anugrah commited on
Commit
a2afebd
1 Parent(s): 016cba2

modify model

Browse files
Files changed (3) hide show
  1. README.md +7 -14
  2. config.json +14 -14
  3. pytorch_model.bin +1 -1
README.md CHANGED
@@ -9,31 +9,24 @@ tags:
9
 
10
  ### Load model and tokenizer
11
  ```python
12
- from transformers import AutoTokenizer, AutoModel
13
 
14
  tokenizer = AutoTokenizer.from_pretrained("ageng-anugrah/indobert-large-p2-finetuned-ner")
15
- model = AutoModel.from_pretrained("ageng-anugrah/indobert-large-p2-finetuned-ner")
16
  ```
17
 
18
  ### Extract NER Tag
19
  ```python
20
  import torch
21
  def predict(model, tokenizer, sentence):
22
- # will be moved to config later
23
- ids_to_labels = {
24
- 0: 'B-ORGANISATION',
25
- 1: 'B-PERSON',
26
- 2: 'B-PLACE',
27
- 3: 'I-ORGANISATION',
28
- 4: 'I-PERSON',
29
- 5: 'I-PLACE',
30
- 6: 'O',
31
- }
32
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
33
  inputs = tokenizer(sentence.split(),
34
  is_split_into_words = True,
35
  return_offsets_mapping=True,
36
- return_tensors="pt")
 
 
 
37
 
38
  model.to(device)
39
  # move to gpu
@@ -48,7 +41,7 @@ def predict(model, tokenizer, sentence):
48
  flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size*seq_len,) - predictions at the token level
49
 
50
  tokens = tokenizer.convert_ids_to_tokens(ids.squeeze().tolist())
51
- token_predictions = [ids_to_labels[i] for i in flattened_predictions.cpu().numpy()]
52
  wp_preds = list(zip(tokens, token_predictions)) # list of tuples. Each tuple = (wordpiece, prediction)
53
 
54
  prediction = []
 
9
 
10
  ### Load model and tokenizer
11
  ```python
12
+ from transformers import AutoTokenizer, AutoModelForTokenClassification
13
 
14
  tokenizer = AutoTokenizer.from_pretrained("ageng-anugrah/indobert-large-p2-finetuned-ner")
15
+ model = AutoModelForTokenClassification.from_pretrained("ageng-anugrah/indobert-large-p2-finetuned-ner")
16
  ```
17
 
18
  ### Extract NER Tag
19
  ```python
20
  import torch
21
  def predict(model, tokenizer, sentence):
 
 
 
 
 
 
 
 
 
 
22
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
  inputs = tokenizer(sentence.split(),
24
  is_split_into_words = True,
25
  return_offsets_mapping=True,
26
+ return_tensors="pt",
27
+ padding='max_length',
28
+ truncation=True,
29
+ max_length=512)
30
 
31
  model.to(device)
32
  # move to gpu
 
41
  flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size*seq_len,) - predictions at the token level
42
 
43
  tokens = tokenizer.convert_ids_to_tokens(ids.squeeze().tolist())
44
+ token_predictions = [model.config.id2label[i] for i in flattened_predictions.cpu().numpy()]
45
  wp_preds = list(zip(tokens, token_predictions)) # list of tuples. Each tuple = (wordpiece, prediction)
46
 
47
  prediction = []
config.json CHANGED
@@ -11,24 +11,24 @@
11
  "hidden_dropout_prob": 0.1,
12
  "hidden_size": 1024,
13
  "id2label": {
14
- "0": "LABEL_0",
15
- "1": "LABEL_1",
16
- "2": "LABEL_2",
17
- "3": "LABEL_3",
18
- "4": "LABEL_4",
19
- "5": "LABEL_5",
20
- "6": "LABEL_6"
21
  },
22
  "initializer_range": 0.02,
23
  "intermediate_size": 4096,
24
  "label2id": {
25
- "LABEL_0": 0,
26
- "LABEL_1": 1,
27
- "LABEL_2": 2,
28
- "LABEL_3": 3,
29
- "LABEL_4": 4,
30
- "LABEL_5": 5,
31
- "LABEL_6": 6
32
  },
33
  "layer_norm_eps": 1e-12,
34
  "max_position_embeddings": 512,
 
11
  "hidden_dropout_prob": 0.1,
12
  "hidden_size": 1024,
13
  "id2label": {
14
+ "0": "B-ORGANISATION",
15
+ "1": "B-PERSON",
16
+ "2": "B-PLACE",
17
+ "3": "I-ORGANISATION",
18
+ "4": "I-PERSON",
19
+ "5": "I-PLACE",
20
+ "6": "O"
21
  },
22
  "initializer_range": 0.02,
23
  "intermediate_size": 4096,
24
  "label2id": {
25
+ "B-ORGANISATION": 0,
26
+ "B-PERSON": 1,
27
+ "B-PLACE": 2,
28
+ "I-ORGANISATION": 3,
29
+ "I-PERSON": 4,
30
+ "I-PLACE": 5,
31
+ "O": 6
32
  },
33
  "layer_norm_eps": 1e-12,
34
  "max_position_embeddings": 512,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:393718ff7be5d13f68dd695793eefcf0273e9bcf6dee91d6613989cdb96072a5
3
  size 1336536045
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c763564e134825e0b325b98415ec3dfaab39d4e8bbd1644372a94489050ade21
3
  size 1336536045