Update README.md
Browse files
README.md
CHANGED
@@ -38,7 +38,7 @@ def preprocess(text):
|
|
38 |
```python
|
39 |
from transformers import pipeline, AutoTokenizer
|
40 |
|
41 |
-
MODEL = "cardiffnlp/twitter-roberta-
|
42 |
fill_mask = pipeline("fill-mask", model=MODEL, tokenizer=MODEL)
|
43 |
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
44 |
|
@@ -65,25 +65,25 @@ Output:
|
|
65 |
```
|
66 |
------------------------------
|
67 |
So glad I'm <mask> vaccinated.
|
68 |
-
1) 0.
|
69 |
-
2) 0.
|
70 |
-
3) 0.
|
71 |
-
4) 0.
|
72 |
-
5) 0.
|
73 |
------------------------------
|
74 |
I keep forgetting to bring a <mask>.
|
75 |
-
1) 0.
|
76 |
-
2) 0.
|
77 |
-
3) 0.
|
78 |
-
4) 0.
|
79 |
-
5) 0.
|
80 |
------------------------------
|
81 |
Looking forward to watching <mask> Game tonight!
|
82 |
-
1) 0.
|
83 |
-
2) 0.
|
84 |
-
3) 0.
|
85 |
-
4) 0.
|
86 |
-
5) 0.
|
87 |
```
|
88 |
|
89 |
## Example Tweet Embeddings
|
@@ -101,7 +101,7 @@ def get_embedding(text): # naive approach for demonstration
|
|
101 |
return np.mean(features[0], axis=0)
|
102 |
|
103 |
|
104 |
-
MODEL = "cardiffnlp/twitter-roberta-
|
105 |
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
106 |
model = AutoModel.from_pretrained(MODEL)
|
107 |
|
@@ -126,10 +126,10 @@ Output:
|
|
126 |
```
|
127 |
Most similar to: The book was awesome
|
128 |
------------------------------
|
129 |
-
1) 0.
|
130 |
-
2) 0.
|
131 |
-
3) 0.
|
132 |
-
4) 0.
|
133 |
```
|
134 |
|
135 |
## Example Feature Extraction
|
@@ -138,7 +138,7 @@ Most similar to: The book was awesome
|
|
138 |
from transformers import AutoTokenizer, AutoModel, TFAutoModel
|
139 |
import numpy as np
|
140 |
|
141 |
-
MODEL = "cardiffnlp/twitter-roberta-
|
142 |
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
143 |
|
144 |
text = "Good night π"
|
|
|
38 |
```python
|
39 |
from transformers import pipeline, AutoTokenizer
|
40 |
|
41 |
+
MODEL = "cardiffnlp/twitter-roberta-large-2022-154m"
|
42 |
fill_mask = pipeline("fill-mask", model=MODEL, tokenizer=MODEL)
|
43 |
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
44 |
|
|
|
65 |
```
|
66 |
------------------------------
|
67 |
So glad I'm <mask> vaccinated.
|
68 |
+
1) 0.37136 fully
|
69 |
+
2) 0.20631 a
|
70 |
+
3) 0.09422 the
|
71 |
+
4) 0.07649 not
|
72 |
+
5) 0.04505 already
|
73 |
------------------------------
|
74 |
I keep forgetting to bring a <mask>.
|
75 |
+
1) 0.10507 mask
|
76 |
+
2) 0.05810 pen
|
77 |
+
3) 0.05142 charger
|
78 |
+
4) 0.04082 tissue
|
79 |
+
5) 0.03955 lighter
|
80 |
------------------------------
|
81 |
Looking forward to watching <mask> Game tonight!
|
82 |
+
1) 0.45783 The
|
83 |
+
2) 0.32842 the
|
84 |
+
3) 0.02705 Squid
|
85 |
+
4) 0.01157 Big
|
86 |
+
5) 0.00538 Match
|
87 |
```
|
88 |
|
89 |
## Example Tweet Embeddings
|
|
|
101 |
return np.mean(features[0], axis=0)
|
102 |
|
103 |
|
104 |
+
MODEL = "cardiffnlp/twitter-roberta-large-2022-154m"
|
105 |
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
106 |
model = AutoModel.from_pretrained(MODEL)
|
107 |
|
|
|
126 |
```
|
127 |
Most similar to: The book was awesome
|
128 |
------------------------------
|
129 |
+
1) 0.99820 The movie was great
|
130 |
+
2) 0.99306 Just finished reading 'Embeddings in NLP'
|
131 |
+
3) 0.99257 What time is the next game?
|
132 |
+
4) 0.98561 I just ordered fried chicken π£
|
133 |
```
|
134 |
|
135 |
## Example Feature Extraction
|
|
|
138 |
from transformers import AutoTokenizer, AutoModel, TFAutoModel
|
139 |
import numpy as np
|
140 |
|
141 |
+
MODEL = "cardiffnlp/twitter-roberta-large-2022-154m"
|
142 |
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
143 |
|
144 |
text = "Good night π"
|