Update README.md
Browse files
README.md
CHANGED
@@ -37,46 +37,52 @@ license: cc-by-nc-4.0
|
|
37 |
import torch
|
38 |
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
|
39 |
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
47 |
|
48 |
# gpu or cpu
|
49 |
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
|
50 |
model.to(device)
|
51 |
model.eval()
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
truncation=True,
|
58 |
-
padding="max_length",
|
59 |
-
return_token_type_ids=False,
|
60 |
-
return_offsets_mapping=True
|
61 |
-
)
|
62 |
-
encodings = {key: torch.tensor([val]) for key, val in encodings.items()}
|
63 |
-
input_ids = encodings["input_ids"].to(device)
|
64 |
-
attention_mask = encodings["attention_mask"].to(device)
|
65 |
-
offset_mappings = encodings["offset_mapping"].to(device)
|
66 |
-
|
67 |
-
# Predict
|
68 |
-
pred = model(input_ids, attention_mask=attention_mask)
|
69 |
-
start_logits, end_logits = pred.start_logits, pred.end_logits
|
70 |
-
token_start_index, token_end_index = start_logits.argmax(dim=-1), end_logits.argmax(dim=-1)
|
71 |
-
pred_ids = input_ids[0][token_start_index: token_end_index + 1]
|
72 |
-
|
73 |
-
# Answer start/end offset of context.
|
74 |
-
answer_start_offset = int(offset_mappings[0][token_start_index][0][0])
|
75 |
-
answer_end_offset = int(offset_mappings[0][token_end_index][0][1])
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
print(
|
|
|
80 |
```
|
81 |
|
82 |
## Citing & Authors
|
37 |
import torch
|
38 |
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
|
39 |
|
40 |
+
def predict_answer(qa_text_pair):
|
41 |
+
# Encoding
|
42 |
+
encodings = tokenizer(
|
43 |
+
qa_text_pair['question'], qa_text_pair['context'],
|
44 |
+
max_length=512,
|
45 |
+
truncation=True,
|
46 |
+
padding="max_length",
|
47 |
+
return_token_type_ids=False,
|
48 |
+
return_offsets_mapping=True
|
49 |
+
)
|
50 |
+
encodings = {key: torch.tensor([val]).to(device) for key, val in encodings.items()}
|
51 |
+
# Predict
|
52 |
+
with torch.no_grad():
|
53 |
+
pred = model(encodings['input_ids'], encodings['attention_mask'])
|
54 |
+
start_logits, end_logits = pred.start_logits, pred.end_logits
|
55 |
+
token_start_index, token_end_index = start_logits.argmax(dim=-1), end_logits.argmax(dim=-1)
|
56 |
+
pred_ids = encodings['input_ids'][0][token_start_index: token_end_index + 1]
|
57 |
+
# Answer start/end offset of context.
|
58 |
+
answer_start_offset = int(encodings['offset_mapping'][0][token_start_index][0][0])
|
59 |
+
answer_end_offset = int(encodings['offset_mapping'][0][token_end_index][0][1])
|
60 |
+
answer_offset = (answer_start_offset, answer_end_offset)
|
61 |
+
# Decoding
|
62 |
+
answer_text = tokenizer.decode(pred_ids) # text
|
63 |
+
del encodings
|
64 |
+
return {'answer_text':answer_text, 'answer_offset':answer_offset}
|
65 |
|
66 |
+
|
67 |
+
# Load fine-tuned MRC model
|
68 |
+
MODEL_PATH = "bespin-global/klue-bert-base-aihub-mrc"
|
69 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
|
70 |
+
model = AutoModelForQuestionAnswering.from_pretrained(MODEL_PATH)
|
71 |
|
72 |
# gpu or cpu
|
73 |
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
|
74 |
model.to(device)
|
75 |
model.eval()
|
76 |
|
77 |
+
context = '''์ ํ M1(์์ด: Apple M1)์ ์ ํ์ด ์์ฌ์ ๋งคํจํ ์ ์ปดํจํฐ์ฉ์ผ๋ก ์ค๊ณํ ์ต์ด์ ARM ๊ธฐ๋ฐ SoC์ด๋ค.
|
78 |
+
4์ธ๋ ๋งฅ๋ถ ์์ด, 5์ธ๋ ๋งฅ ๋ฏธ๋, 13์ธ์น 5์ธ๋ ๋งฅ๋ถ ํ๋ก, 5์ธ๋ ์์ดํจ๋ ํ๋ก์ ์ ๋ณด์๋ค. 5๋๋
ธ๋ฏธํฐ ๊ณต์ ์ ์ฌ์ฉํ์ฌ ์ ์กฐ๋ ์ต์ด์ ๊ฐ์ธ์ฉ ์ปดํจํฐ ์นฉ์ด๋ค.
|
79 |
+
์ ํ์ ์ ์ ๋ ฅ ์ค๋ฆฌ์ฝ์, ์ธ๊ณ์์ ๊ฐ์ฅ ๋น ๋ฅธ ARM ๊ธฐ๋ฐ์ ์ค์ ์ฒ๋ฆฌ ์ฅ์น(CPU) ์ฝ์ด, ๊ทธ๋ฆฌ๊ณ ์ธ๊ณ ์ต๊ณ ์ CPU ์ฑ๋ฅ ๋ ์ํธ๋ฅผ ๊ฐ์ถ๊ณ ์๋ค๊ณ ์ฃผ์ฅํ๊ณ ์๋ค.'''
|
80 |
+
question = "์ ํ์ด m1์ ๋ํด ์ฃผ์ฅํ๋๊ฑด ๋ญ์ผ?"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
+
qa_text_pair = {'context':context, 'question':question}
|
83 |
+
result = predict_answer(qa_text_pair)
|
84 |
+
print('Answer Text: ', result['answer_text']) # ์ ์ ๋ ฅ ์ค๋ฆฌ์ฝ์, ์ธ๊ณ์์ ๊ฐ์ฅ ๋น ๋ฅธ ARM ๊ธฐ๋ฐ์ ์ค์ ์ฒ๋ฆฌ ์ฅ์น ( CPU ) ์ฝ์ด, ๊ทธ๋ฆฌ๊ณ ์ธ๊ณ ์ต๊ณ ์ CPU ์ฑ๋ฅ ๋ ์ํธ๋ฅผ ๊ฐ์ถ๊ณ ์๋ค๊ณ ์ฃผ์ฅํ๊ณ ์๋ค.
|
85 |
+
print('Answer Offset: ', result['answer_offset']) # (159, 246)
|
86 |
```
|
87 |
|
88 |
## Citing & Authors
|