xl2533 commited on
Commit
059bf6f
1 Parent(s): d50fd60
Files changed (1) hide show
  1. ape/instance.py +9 -4
ape/instance.py CHANGED
@@ -80,9 +80,12 @@ def load_qa(file='./ape/data/qa_train.json'):
80
  raw_data = json.load(open(file, encoding='UTF8'))
81
  for i in raw_data:
82
  input = i['text']
83
- #只取一个QA不然容易超出模型输入长度
84
- output = {'问题': i['annotations'][0]["Q"], '回答': i['annotations'][0]["A"]}
85
- output = json.dumps(output, ensure_ascii=False)
 
 
 
86
  data.append((input, output))
87
  return data
88
 
@@ -103,7 +106,9 @@ def load_entity(file='./ape/data/entity_train.json'):
103
  input = i['text']
104
  output = []
105
  for j in i['labels']:
106
- output.append({'类型': j[1], '实体': j[-1]})
 
 
107
  output = json.dumps(output, ensure_ascii=False)
108
  data.append((input, output))
109
  return data
 
80
  raw_data = json.load(open(file, encoding='UTF8'))
81
  for i in raw_data:
82
  input = i['text']
83
+ # 只取一个QA不然容易超出模型输入长度'
84
+ output = []
85
+ for j in i['annotations']:
86
+ output.append(json.dumps({'问题': j["Q"], '回答': j["A"]}, ensure_ascii=False))
87
+ output = sorted(output, key=lambda x: len(x))
88
+ output = output[0]
89
  data.append((input, output))
90
  return data
91
 
 
106
  input = i['text']
107
  output = []
108
  for j in i['labels']:
109
+ ##筛选局部实体类型,也可以拆分成单个实体类型
110
+ if j[1] in ['DRUG_DOSAGE', 'DRUG_TASTE', 'DRUG_EFFICACY']:
111
+ output.append({'类型': j[1], '实体': j[-1]})
112
  output = json.dumps(output, ensure_ascii=False)
113
  data.append((input, output))
114
  return data