Fernando Carneiro commited on
Commit
1332e64
1 Parent(s): 07fab57
Files changed (1) hide show
  1. README.md +6 -0
README.md CHANGED
@@ -30,9 +30,15 @@ with torch.no_grad():
30
  ### Normalize raw input Tweets
31
 
32
  ```python
 
33
  import torch
34
  from transformers import AutoModel, AutoTokenizer
35
 
 
 
 
 
 
36
 
37
  ```python
38
  from transformers import pipeline
 
30
  ### Normalize raw input Tweets
31
 
32
  ```python
33
+ from emoji import demojize
34
  import torch
35
  from transformers import AutoModel, AutoTokenizer
36
 
37
+ model = AutoModel.from_pretrained('melll-uff/bertweetbr')
38
+ tokenizer = AutoTokenizer.from_pretrained('melll-uff/bertweetbr', normalization=False)
39
+
40
+ tokenizer.demojizer = lambda x: demojize(x, language='pt')
41
+ ```
42
 
43
  ```python
44
  from transformers import pipeline