Shaltiel commited on
Commit
26aea07
โ€ข
1 Parent(s): 2cdc088

Upload BertForJointParsing.py

Browse files
Files changed (1) hide show
  1. BertForJointParsing.py +12 -8
BertForJointParsing.py CHANGED
@@ -353,7 +353,6 @@ def convert_output_to_ud(output_sentences, htb_extras=False):
353
  word['lex'] = word['seg'][-1]
354
 
355
  start = len(intermediate_output)
356
- idx_to_key[word_idx] = len(intermediate_output) + 1
357
  # Add in all the prefixes
358
  if len(word['seg']) > 1:
359
  for pre in get_prefixes_from_str(word['seg'][0], greedy=True):
@@ -370,6 +369,7 @@ def convert_output_to_ud(output_sentences, htb_extras=False):
370
  intermediate_output[-1]['feats'] = 'Definite=Def|PronType=Art'
371
 
372
 
 
373
  # add the main word in!
374
  intermediate_output.append(dict(
375
  word=word['seg'][-1], lex=word['lex'], pos=word['morph']['pos'],
@@ -380,9 +380,9 @@ def convert_output_to_ud(output_sentences, htb_extras=False):
380
  if word['morph']['suffix']:
381
  # first determine the dependency info:
382
  # For adp, num, det - they main word points to here, and the suffix points to the dependency
 
383
  if word['morph']['pos'] in ['ADP', 'NUM', 'DET']:
384
- intermediate_output[-1]['dep'] = len(intermediate_output)
385
- intermediate_output[-1]['absolute_dep'] = True
386
  intermediate_output[-1]['func'] = 'case'
387
  dep = word['syntax']['dep_head_idx']
388
  func = word['syntax']['dep_func']
@@ -409,14 +409,18 @@ def convert_output_to_ud(output_sentences, htb_extras=False):
409
  # for htb:
410
  else:
411
  # main word becomes the lexeme, the suffix is based on the features
412
- intermediate_output[-1]['word'] = s_lex + '_'
413
  suf_feats = word['morph']['suffix_feats']
414
- suf = ud_suffix_to_htb_str.get(f"Gender={suf_feats['Gender']}|Number={suf_feats['Number']}|Person={suf_feats['Person']}", "_ื”ื•ื")
415
  # for HTB, if the function is poss, then add a shel pointing to the next word
416
  if func == 'nmod:poss':
417
- intermediate_output.append(dict(word='_ืฉืœ_', lex='ืฉืœ', pos='ADP', dep=len(intermediate_output) + 1, func='case', feats='_', absolute_dep=True))
418
  # add the main suffix in
419
  intermediate_output.append(dict(word=suf, lex='ื”ื•ื', pos='PRON', dep=dep, func=func, feats='|'.join(f'{k}={v}' for k,v in word['morph']['suffix_feats'].items())))
 
 
 
 
420
  end = len(intermediate_output)
421
  ranges.append((start, end, word['token']))
422
 
@@ -458,12 +462,12 @@ def ud_get_prefix_dep(pre, word, word_idx):
458
  func = 'mark'
459
  # vuv goes to the main word if the function is in the list, otherwise follows
460
  elif pre == 'ื•':
461
- does_follow_main = word['syntax']['dep_func'] in ["conj", "acl:recl", "parataxis", "root", "acl", "amod", "list", "appos", "dep", "flatccomp"]
462
  func = 'cc'
463
  else:
464
  # for adj, noun, propn, pron, verb - prefixes go to the main word
465
  if word['morph']['pos'] in ["ADJ", "NOUN", "PROPN", "PRON", "VERB"]:
466
- does_follow_main = True
467
  # otherwise - prefix follows the word if the function is in the list
468
  else: does_follow_main = word['syntax']['dep_func'] in ["compound:affix", "det", "aux", "nummod", "advmod", "dep", "cop", "mark", "fixed"]
469
 
 
353
  word['lex'] = word['seg'][-1]
354
 
355
  start = len(intermediate_output)
 
356
  # Add in all the prefixes
357
  if len(word['seg']) > 1:
358
  for pre in get_prefixes_from_str(word['seg'][0], greedy=True):
 
369
  intermediate_output[-1]['feats'] = 'Definite=Def|PronType=Art'
370
 
371
 
372
+ idx_to_key[word_idx] = len(intermediate_output) + 1
373
  # add the main word in!
374
  intermediate_output.append(dict(
375
  word=word['seg'][-1], lex=word['lex'], pos=word['morph']['pos'],
 
380
  if word['morph']['suffix']:
381
  # first determine the dependency info:
382
  # For adp, num, det - they main word points to here, and the suffix points to the dependency
383
+ entry_to_assign_suf_dep = None
384
  if word['morph']['pos'] in ['ADP', 'NUM', 'DET']:
385
+ entry_to_assign_suf_dep = intermediate_output[-1]
 
386
  intermediate_output[-1]['func'] = 'case'
387
  dep = word['syntax']['dep_head_idx']
388
  func = word['syntax']['dep_func']
 
409
  # for htb:
410
  else:
411
  # main word becomes the lexeme, the suffix is based on the features
412
+ intermediate_output[-1]['word'] = (s_lex if s_lex != s_word else s_word[:-1]) + '_'
413
  suf_feats = word['morph']['suffix_feats']
414
+ suf = ud_suffix_to_htb_str.get(f"Gender={suf_feats.get('Gender', 'Fem,Masc')}|Number={suf_feats.get('Number', 'Sing')}|Person={suf_feats.get('Person', '3')}", "_ื”ื•ื")
415
  # for HTB, if the function is poss, then add a shel pointing to the next word
416
  if func == 'nmod:poss':
417
+ intermediate_output.append(dict(word='_ืฉืœ_', lex='ืฉืœ', pos='ADP', dep=len(intermediate_output) + 2, func='case', feats='_', absolute_dep=True))
418
  # add the main suffix in
419
  intermediate_output.append(dict(word=suf, lex='ื”ื•ื', pos='PRON', dep=dep, func=func, feats='|'.join(f'{k}={v}' for k,v in word['morph']['suffix_feats'].items())))
420
+ if entry_to_assign_suf_dep:
421
+ entry_to_assign_suf_dep['dep'] = len(intermediate_output)
422
+ entry_to_assign_suf_dep['absolute_dep'] = True
423
+
424
  end = len(intermediate_output)
425
  ranges.append((start, end, word['token']))
426
 
 
462
  func = 'mark'
463
  # vuv goes to the main word if the function is in the list, otherwise follows
464
  elif pre == 'ื•':
465
+ does_follow_main = word['syntax']['dep_func'] not in ["conj", "acl:recl", "parataxis", "root", "acl", "amod", "list", "appos", "dep", "flatccomp"]
466
  func = 'cc'
467
  else:
468
  # for adj, noun, propn, pron, verb - prefixes go to the main word
469
  if word['morph']['pos'] in ["ADJ", "NOUN", "PROPN", "PRON", "VERB"]:
470
+ does_follow_main = False
471
  # otherwise - prefix follows the word if the function is in the list
472
  else: does_follow_main = word['syntax']['dep_func'] in ["compound:affix", "det", "aux", "nummod", "advmod", "dep", "cop", "mark", "fixed"]
473