felipemaiapolo commited on
Commit
c63c45a
1 Parent(s): 6f824d1

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +39 -19
README.md CHANGED
@@ -31,28 +31,48 @@ model = AutoModelForPreTraining.from_pretrained('felipemaiapolo/legalnlp-bert')
31
  tokenizer = AutoTokenizer.from_pretrained('felipemaiapolo/legalnlp-bert', do_lower_case=False)
32
  ```
33
 
34
- ### Ex. extracting BERT embeddings
 
 
 
 
 
 
 
 
 
 
35
 
36
  ```python
37
- import torch
38
-
39
- model = AutoModel.from_pretrained('felipemaiapolo/legalnlp-bert')
40
- input_ids = tokenizer.encode('Tinha uma pedra no meio do caminho.', return_tensors='pt')
41
-
42
- with torch.no_grad():
43
- outs = model(input_ids)
44
- encoded = outs[0][0, 1:-1] # Ignore [CLS] and [SEP] special tokens
45
-
46
- # encoded.shape: (8, 768)
47
- # tensor([[-0.0398, -0.3057, 0.2431, ..., -0.5420, 0.1857, -0.5775],
48
- # [-0.2926, -0.1957, 0.7020, ..., -0.2843, 0.0530, -0.4304],
49
- # [ 0.2463, -0.1467, 0.5496, ..., 0.3781, -0.2325, -0.5469],
50
- # ...,
51
- # [ 0.0662, 0.7817, 0.3486, ..., -0.4131, -0.2852, -0.2819],
52
- # [ 0.0662, 0.2845, 0.1871, ..., -0.2542, -0.2933, -0.0661],
53
- # [ 0.2761, -0.1657, 0.3288, ..., -0.2102, 0.0029, -0.2009]])
 
 
 
 
 
 
 
 
 
54
  ```
55
-
 
56
  # References
57
 
58
  [1] Polo, Felipe Maia, et al. "LegalNLP-Natural Language Processing methods for the Brazilian Legal Language." Anais do XVIII Encontro Nacional de Inteligência Artificial e Computacional. SBC, 2021.
 
31
  tokenizer = AutoTokenizer.from_pretrained('felipemaiapolo/legalnlp-bert', do_lower_case=False)
32
  ```
33
 
34
+
35
+ ### Ex. BERT embeddings
36
+
37
+ ```python
38
+ from transformers import pipeline
39
+
40
+ pipe = pipeline("feature-extraction", model='felipemaiapolo/legalnlp-bert')
41
+ encoded_sentence = pipe('Juíz negou o recurso.')
42
+
43
+
44
+ ### Masked language modeling prediction example
45
 
46
  ```python
47
+ from transformers import pipeline
48
+
49
+ pipe = pipeline('fill-mask', model=model, tokenizer=tokenizer)
50
+
51
+ pipe('Juíz negou o [MASK].')
52
+ # [{'score': 0.6387444734573364,
53
+ # 'token': 7608,
54
+ # 'token_str': 'julgamento',
55
+ # 'sequence': 'juiz negou o julgamento.'},
56
+ # {'score': 0.09632532298564911,
57
+ # 'token': 7509,
58
+ # 'token_str': 'voto',
59
+ # 'sequence': 'juiz negou o voto.'},
60
+ # {'score': 0.06424401700496674,
61
+ # 'token': 17225,
62
+ # 'token_str': 'julgado',
63
+ # 'sequence': 'juiz negou o julgado.'},
64
+ # {'score': 0.05929475650191307,
65
+ # 'token': 8190,
66
+ # 'token_str': 'recurso',
67
+ # 'sequence': 'juiz negou o recurso.'},
68
+ # {'score': 0.011442390270531178,
69
+ # 'token': 6330,
70
+ # 'token_str': 'registro',
71
+ # 'sequence': 'juiz negou o registro.'}]
72
+
73
  ```
74
+
75
+
76
  # References
77
 
78
  [1] Polo, Felipe Maia, et al. "LegalNLP-Natural Language Processing methods for the Brazilian Legal Language." Anais do XVIII Encontro Nacional de Inteligência Artificial e Computacional. SBC, 2021.