NchuNLP commited on
Commit
c00321a
1 Parent(s): 5adcb05

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +41 -1
README.md CHANGED
@@ -22,7 +22,7 @@ model_name = "NchuNLP/Chinese-Question-Answering"
22
  tokenizer = BertTokenizerFast.from_pretrained(model_name)
23
  model = BertForQuestionAnswering.from_pretrained(model_name)
24
 
25
- # a) Get predictions
26
  nlp = pipeline('question-answering', model=model, tokenizer=tokenizer)
27
  QA_input = {
28
  'question': '中興大學在哪裡?',
@@ -30,6 +30,46 @@ QA_input = {
30
  }
31
  res = nlp(QA_input)
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  ```
34
 
35
  ## Authors
 
22
  tokenizer = BertTokenizerFast.from_pretrained(model_name)
23
  model = BertForQuestionAnswering.from_pretrained(model_name)
24
 
25
+ # a) Get predictions
26
  nlp = pipeline('question-answering', model=model, tokenizer=tokenizer)
27
  QA_input = {
28
  'question': '中興大學在哪裡?',
 
30
  }
31
  res = nlp(QA_input)
32
 
33
+ # b) Inside the Question answering pipeline
34
+
35
+ inputs = tokenizer(query, text, return_tensors="pt",padding=True, truncation=True, max_length=512, stride=256)
36
+ outputs = model(**inputs)
37
+
38
+ sequence_ids = inputs.sequence_ids()
39
+ # Mask everything apart from the tokens of the context
40
+ mask = [i != 1 for i in sequence_ids]
41
+ # Unmask the [CLS] token
42
+ mask[0] = False
43
+ mask = torch.tensor(mask)[None]
44
+
45
+ start_logits[mask] = -10000
46
+ end_logits[mask] = -10000
47
+
48
+ start_probabilities = torch.nn.functional.softmax(start_logits, dim=-1)[0]
49
+ end_probabilities = torch.nn.functional.softmax(end_logits, dim=-1)[0]
50
+
51
+ scores = start_probabilities[:, None] * end_probabilities[None, :]
52
+
53
+ max_index = scores.argmax().item()
54
+ start_index = max_index // scores.shape[1]
55
+ end_index = max_index % scores.shape[1]
56
+
57
+
58
+ inputs_with_offsets = tokenizer(query, text, return_offsets_mapping=True)
59
+ offsets = inputs_with_offsets["offset_mapping"]
60
+
61
+ start_char, _ = offsets[start_index]
62
+ _, end_char = offsets[end_index]
63
+ answer = text[start_char:end_char]
64
+
65
+ result = {
66
+ "answer": answer,
67
+ "start": start_char,
68
+ "end": end_char,
69
+ "score": scores[start_index, end_index],
70
+ }
71
+ print(result)
72
+
73
  ```
74
 
75
  ## Authors