KomeijiForce
commited on
Commit
•
5f98890
1
Parent(s):
d012c09
Update README.md
Browse files
README.md
CHANGED
@@ -10,45 +10,48 @@ model-index:
|
|
10 |
|
11 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
12 |
should probably proofread and complete it, then remove this comment. -->
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
10 |
|
11 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
12 |
should probably proofread and complete it, then remove this comment. -->
|
13 |
+
```python
|
14 |
+
from transformers import AutoModelForTokenClassification, AutoTokenizer
|
15 |
+
import torch
|
16 |
+
|
17 |
+
device = torch.device("cuda:0")
|
18 |
+
path = f"KomeijiForce/roberta-large-metaie"
|
19 |
+
tokenizer = AutoTokenizer.from_pretrained(path)
|
20 |
+
tagger = AutoModelForTokenClassification.from_pretrained(path).to(device)
|
21 |
+
|
22 |
+
def find_sequences(lst):
|
23 |
+
sequences = []
|
24 |
+
i = 0
|
25 |
+
while i < len(lst):
|
26 |
+
if lst[i] == 0:
|
27 |
+
start = i
|
28 |
+
end = i
|
29 |
+
i += 1
|
30 |
+
while i < len(lst) and lst[i] == 1:
|
31 |
+
end = i
|
32 |
+
i += 1
|
33 |
+
sequences.append((start, end+1))
|
34 |
+
else:
|
35 |
+
i += 1
|
36 |
+
return sequences
|
37 |
+
|
38 |
+
def is_sublst(lst1, lst2):
|
39 |
+
for idx in range(len(lst1)-len(lst2)+1):
|
40 |
+
if lst1[idx:idx+len(lst2)] == lst2:
|
41 |
+
return True
|
42 |
+
return False
|
43 |
+
|
44 |
+
words = ["John", "Smith", "loves", "his", "hometown", ",", "Los", "Angeles", "."]
|
45 |
+
|
46 |
+
for prefix in ["Person", "Location", "John Smith births in", "Positive opinion"]:
|
47 |
+
|
48 |
+
sentence = " ".join([prefix, ":"]+words)
|
49 |
+
|
50 |
+
inputs = tokenizer(sentence, return_tensors="pt").to(device)
|
51 |
+
tag_predictions = tagger(**inputs).logits[0].argmax(-1)
|
52 |
+
|
53 |
+
predictions = [tokenizer.decode(inputs.input_ids[0, seq[0]:seq[1]]).strip() for seq in find_sequences(tag_predictions)]
|
54 |
+
predictions = [prediction for prediction in predictions if is_sublst(words, prediction.split())]
|
55 |
+
|
56 |
+
print(prefix, predictions)
|
57 |
+
```
|