KonradSzafer commited on
Commit
34ac5d3
1 Parent(s): 0121498

answer post-processing improvements

Browse files
Files changed (1) hide show
  1. qa_engine/qa_engine.py +8 -2
qa_engine/qa_engine.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import json
3
  import requests
4
  import subprocess
@@ -226,16 +227,21 @@ class QAEngine():
226
  '''
227
  SEQUENCES_TO_REMOVE = [
228
  'Factually: ', 'Answer: ', '<<SYS>>', '<</SYS>>', '[INST]', '[/INST]',
229
- '<context>', '<\context>', '<question>', '<\question>',
230
  ]
231
  SEQUENCES_TO_STOP = [
232
  'User:', 'You:', 'Question:'
233
  ]
 
 
 
234
  for seq in SEQUENCES_TO_REMOVE:
235
  answer = answer.replace(seq, '')
236
  for seq in SEQUENCES_TO_STOP:
237
  if seq in answer:
238
- answer = answer[:answer.index(seq)]
 
 
239
  answer = answer.strip()
240
  return answer
241
 
 
1
  import os
2
+ import re
3
  import json
4
  import requests
5
  import subprocess
 
227
  '''
228
  SEQUENCES_TO_REMOVE = [
229
  'Factually: ', 'Answer: ', '<<SYS>>', '<</SYS>>', '[INST]', '[/INST]',
230
+ '<context>', '</context>', '<question>', '</question>',
231
  ]
232
  SEQUENCES_TO_STOP = [
233
  'User:', 'You:', 'Question:'
234
  ]
235
+ CHARS_TO_DEDUPLICATE = [
236
+ '\n', '\t', ' '
237
+ ]
238
  for seq in SEQUENCES_TO_REMOVE:
239
  answer = answer.replace(seq, '')
240
  for seq in SEQUENCES_TO_STOP:
241
  if seq in answer:
242
+ answer = answer[:answer.index(seq)]
243
+ for char in CHARS_TO_DEDUPLICATE:
244
+ answer = re.sub(f'{char}+', f'{char}', answer)
245
  answer = answer.strip()
246
  return answer
247