chen666-666 commited on
Commit
b910ef3
·
verified ·
1 Parent(s): b78acbf

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -7
app.py CHANGED
@@ -225,25 +225,32 @@ def ner(text, model_type="bert"):
225
 
226
 
227
  # ======================== 关系抽取(RE) ========================
228
- def re_extract(entities, text):
229
  # 参数校验
230
  if not entities or not text:
 
231
  return []
232
 
233
  # 实体类型过滤(根据业务需求调整)
234
  valid_entity_types = {"PER", "LOC", "ORG", "TITLE"}
235
  filtered_entities = [e for e in entities if e.get("type") in valid_entity_types]
236
 
 
 
 
 
237
  # --------------------- 处理单实体场景 ---------------------
238
  if len(filtered_entities) == 1:
239
  single_relations = []
240
  ent = filtered_entities[0]
 
241
 
242
  # 规则1:人物职位检测
243
  if ent["type"] == "PER":
244
  position_keywords = ["CEO", "经理", "总监", "工程师", "教授"]
245
  for keyword in position_keywords:
246
  if keyword in text:
 
247
  single_relations.append({
248
  "head": ent["text"],
249
  "tail": keyword,
@@ -258,6 +265,7 @@ def re_extract(entities, text):
258
  if verb in text:
259
  match = re.search(fr"{ent['text']}{verb}(.*?)[,。]", text)
260
  if match:
 
261
  single_relations.append({
262
  "head": ent["text"],
263
  "tail": match.group(1).strip(),
@@ -269,9 +277,10 @@ def re_extract(entities, text):
269
  # --------------------- 多实体关系抽取 ---------------------
270
  relations = []
271
 
272
- # 方案1:使用ChatGLM抽取关系
273
- if use_chatglm and len(filtered_entities) >= 2:
274
  try:
 
275
  entity_list = [e["text"] for e in filtered_entities]
276
  prompt = f"""请分析以下文本中的实体关系,严格按照JSON列表格式返回:
277
  文本内容:{text}
@@ -282,9 +291,8 @@ def re_extract(entities, text):
282
  3. 示例格式:[{{"head":"实体1", "tail":"实体2", "relation":"关系类型"}}]
283
  请直接返回JSON,不要多余内容:"""
284
 
285
- response = chatglm_model.chat(chatglm_tokenizer, prompt, temperature=0.01)
286
- if isinstance(response, tuple):
287
- response = response[0]
288
 
289
  # 增强JSON解析
290
  try:
@@ -305,11 +313,13 @@ def re_extract(entities, text):
305
  rel.get("relation") in valid_rel_types):
306
  valid_relations.append(rel)
307
  relations = valid_relations
 
 
308
  except Exception as e:
309
  print(f"[DEBUG] 关系解析失败: {str(e)}")
310
 
311
  except Exception as e:
312
- print(f"ChatGLM关系抽取异常: {str(e)}")
313
 
314
  # 方案2:规则兜底(当模型不可用或未抽取出关系时)
315
  if len(relations) == 0:
@@ -317,18 +327,21 @@ def re_extract(entities, text):
317
  location_matches = re.finditer(r'([^\s,。]+)[位于|坐落于|地处]([^\s,。]+)', text)
318
  for match in location_matches:
319
  head, tail = match.groups()
 
320
  relations.append({"head": head, "tail": tail, "relation": "位于"})
321
 
322
  # 规则2:A属于B
323
  belong_matches = re.finditer(r'([^\s,。]+)(属于|隶属于)([^\s,。]+)', text)
324
  for match in belong_matches:
325
  head, _, tail = match.groups()
 
326
  relations.append({"head": head, "tail": tail, "relation": "属于"})
327
 
328
  # 规则3:人物-机构关系
329
  person_org_pattern = r'([\u4e00-\u9fa5]{2,4})(现任|担任|就职于)([\u4e00-\u9fa5]+?公司|[\u4e00-\u9fa5]+?大学)'
330
  for match in re.finditer(person_org_pattern, text):
331
  head, _, tail = match.groups()
 
332
  relations.append({"head": head, "tail": tail, "relation": "任职于"})
333
 
334
  # 后��理:去重和验证
@@ -341,8 +354,11 @@ def re_extract(entities, text):
341
  head_exists = any(e["text"] == rel["head"] for e in filtered_entities)
342
  tail_exists = any(e["text"] == rel["tail"] for e in filtered_entities)
343
  if head_exists and tail_exists:
 
344
  final_relations.append(rel)
345
  seen.add(key)
 
 
346
 
347
  return final_relations
348
 
 
225
 
226
 
227
  # ======================== 关系抽取(RE) ========================
228
+ def re_extract(entities, text, use_bert_model=True, bert_model=None):
229
  # 参数校验
230
  if not entities or not text:
231
+ print("[DEBUG] 参数校验失败,实体或文本为空")
232
  return []
233
 
234
  # 实体类型过滤(根据业务需求调整)
235
  valid_entity_types = {"PER", "LOC", "ORG", "TITLE"}
236
  filtered_entities = [e for e in entities if e.get("type") in valid_entity_types]
237
 
238
+ if not filtered_entities:
239
+ print("[DEBUG] 未找到有效的实体")
240
+ return []
241
+
242
  # --------------------- 处理单实体场景 ---------------------
243
  if len(filtered_entities) == 1:
244
  single_relations = []
245
  ent = filtered_entities[0]
246
+ print(f"[DEBUG] 处理单实体:{ent['text']},类型:{ent['type']}")
247
 
248
  # 规则1:人物职位检测
249
  if ent["type"] == "PER":
250
  position_keywords = ["CEO", "经理", "总监", "工程师", "教授"]
251
  for keyword in position_keywords:
252
  if keyword in text:
253
+ print(f"[DEBUG] 发现职位关键词:{keyword}")
254
  single_relations.append({
255
  "head": ent["text"],
256
  "tail": keyword,
 
265
  if verb in text:
266
  match = re.search(fr"{ent['text']}{verb}(.*?)[,。]", text)
267
  if match:
268
+ print(f"[DEBUG] 发现位置关系:{ent['text']} {verb} {match.group(1)}")
269
  single_relations.append({
270
  "head": ent["text"],
271
  "tail": match.group(1).strip(),
 
277
  # --------------------- 多实体关系抽取 ---------------------
278
  relations = []
279
 
280
+ # 方案1:使用BERT模型进行关系抽取
281
+ if use_bert_model and len(filtered_entities) >= 2:
282
  try:
283
+ # 假设 BERT 模型是基于你自己训练的模型进行关系抽取
284
  entity_list = [e["text"] for e in filtered_entities]
285
  prompt = f"""请分析以下文本中的实体关系,严格按照JSON列表格式返回:
286
  文本内容:{text}
 
291
  3. 示例格式:[{{"head":"实体1", "tail":"实体2", "relation":"关系类型"}}]
292
  请直接返回JSON,不要多余内容:"""
293
 
294
+ # 使用BERT模型进行关系抽取(这里假设模型函数是 `bert_model.predict`,具体调用方式按你模型接口调整)
295
+ response = bert_model.predict(prompt)
 
296
 
297
  # 增强JSON解析
298
  try:
 
313
  rel.get("relation") in valid_rel_types):
314
  valid_relations.append(rel)
315
  relations = valid_relations
316
+ else:
317
+ print("[DEBUG] 未能解析出关系JSON")
318
  except Exception as e:
319
  print(f"[DEBUG] 关系解析失败: {str(e)}")
320
 
321
  except Exception as e:
322
+ print(f"[DEBUG] BERT模型关系抽取异常: {str(e)}")
323
 
324
  # 方案2:规则兜底(当模型不可用或未抽取出关系时)
325
  if len(relations) == 0:
 
327
  location_matches = re.finditer(r'([^\s,。]+)[位于|坐落于|地处]([^\s,。]+)', text)
328
  for match in location_matches:
329
  head, tail = match.groups()
330
+ print(f"[DEBUG] 发现位于关系:{head} 位于 {tail}")
331
  relations.append({"head": head, "tail": tail, "relation": "位于"})
332
 
333
  # 规则2:A属于B
334
  belong_matches = re.finditer(r'([^\s,。]+)(属于|隶属于)([^\s,。]+)', text)
335
  for match in belong_matches:
336
  head, _, tail = match.groups()
337
+ print(f"[DEBUG] 发现属于关系:{head} 属于 {tail}")
338
  relations.append({"head": head, "tail": tail, "relation": "属于"})
339
 
340
  # 规则3:人物-机构关系
341
  person_org_pattern = r'([\u4e00-\u9fa5]{2,4})(现任|担任|就职于)([\u4e00-\u9fa5]+?公司|[\u4e00-\u9fa5]+?大学)'
342
  for match in re.finditer(person_org_pattern, text):
343
  head, _, tail = match.groups()
344
+ print(f"[DEBUG] 发现人物职位关系:{head} {tail}")
345
  relations.append({"head": head, "tail": tail, "relation": "任职于"})
346
 
347
  # 后��理:去重和验证
 
354
  head_exists = any(e["text"] == rel["head"] for e in filtered_entities)
355
  tail_exists = any(e["text"] == rel["tail"] for e in filtered_entities)
356
  if head_exists and tail_exists:
357
+ print(f"[DEBUG] 添加有效关系:{rel}")
358
  final_relations.append(rel)
359
  seen.add(key)
360
+ else:
361
+ print(f"[DEBUG] 无效关系:{rel}")
362
 
363
  return final_relations
364