Spaces:

egumasa
/

engagement-analyzer-demo

Running

App Files Files Community

engagement-analyzer-demo / pipeline /post_processors.py

egumasa

new requirements

9e3e64a 12 months ago

raw history blame

No virus

37.4 kB


	from typing import List, Sequence, Tuple, Optional, Dict, Union, Callable
	import pandas as pd
	import spacy
	from spacy.language import Language
	from skbio import diversity as dv

	SPAN_ATTRS = ["text", "label_", "start", "end"]
	CATEGORIES = ['ATTRIBUTION', "CITATION", "COUNTER", "DENY", "ENDOPHORIC", "ENTERTAIN", "JUSTIFYING", "MONOGLOSS", "PROCLAIM", "SOURCES"]


	def simple_table(doc: Union[spacy.tokens.Doc, Dict[str, str]],
	spans_key: str = "sc",
	attrs: List[str] = SPAN_ATTRS):
	columns = attrs + ["Conf. score"]
	data = [
	[str(getattr(span, attr))
	for attr in attrs] + [score] # [f'{score:.5f}']
	for span, score in zip(doc.spans[spans_key], doc.spans[spans_key].attrs['scores'])
	]
	return data, columns


	# def span_info_aggregator()

	def construction_classifier(doc, span):
	category = None
	spanroot = span.root

	## Grabbing lexico-grammatical information
	span_t_dep_ = ["_".join([t.norm_, t.dep_]) for t in span]
	span_dep = [t.dep_ for t in span]
	span_token = [t.norm_ for t in span]
	span_tag = [t.tag_ for t in span]


	c = [c for c in spanroot.children]
	c_t_dep_ = ["_".join([t.norm_, t.dep_]) for t in spanroot.children]

	c_norm = [c.norm_ for c in spanroot.children]
	c_dep = [c.dep_ for c in spanroot.children]
	c_pos = [c.pos_ for c in spanroot.children]
	c_tag = [c.tag_ for c in spanroot.children]

	right_dep = [c.dep_ for c in spanroot.rights]

	#conditionals
	subjless = all(c.dep_ not in ['nsubj', 'nsubjpass', 'csubj', 'csubjpass'] for c in spanroot.children)
	argmentless = all(c.dep_ not in ['nsubj', 'nsubjpass', 'csubj', 'csubjpass', "dobj", 'ccomp', 'xcomp', 'dative', "attr", "oprd", "acomp"] for c in spanroot.children)
	argless_span = all(c.dep_ not in ['nsubj', 'nsubjpass', 'csubj', 'csubjpass', "dobj", 'ccomp', 'xcomp', 'dative', "attr", "oprd", "acomp"] for c in span)

	## nesting classifiers
	if spanroot.dep_ == "conj":
	while spanroot.dep_ == 'conj':
	spanroot = spanroot.head
	# if spanroot.dep_ == "poss":
	# while spanroot.dep_ == 'poss':
	# spanroot = spanroot.head

	## Conjunctions
	# Preconjunctions
	if spanroot.dep_ in ['preconj', 'cc']:
	category = "Conjunction"

	## NOUN PHRASES
	# adverbial phrases
	if spanroot.dep_ in ['amod']:
	category = "Adjectival modifier"
	# adverbial phrases
	if spanroot.dep_ in ['compound']:
	category = "Compound noun"

	## Nominal category
	if spanroot.dep_ in ["pobj", "dobj", "obj", "iobj", "dative"]:
	if "acl" in c_dep:
	category = "Noun + Complement (Object)"
	else:
	category = "Object"

	if spanroot.dep_ in ["nsubj", "nsubjpass"]:
	if "acl" in c_dep:
	category = "Noun + Complement (Subject)"
	else:
	category = "Subject"

	## ADJUNCTS
	# prep phrases
	if spanroot.dep_ in ['prep', 'agent']:
	category = 'Prepositional phrase'
	# adverbial phrases
	if spanroot.dep_ in ['advmod', "npadvmod", "nmod", "npmod", 'quantmod']:
	category = "Adverbial phrase"

	## Predication patterns
	if spanroot.dep_ in ['acomp', 'oprd']:
	if "xcomp" in c_dep:
	category = "Subject predicate to-cl"
	else:
	category = "Adjectival complement"

	if spanroot.dep_ in ['attr']:
	subjless = all(c.dep_ not in ['nsubj', 'nsubjpass', 'csubj', 'csubjpass'] for c in spanroot.children)

	c_head = [c.dep_ for c in spanroot.head.children]
	if "expl" in c_head and "no_det" in span_t_dep_:
	category = "There is/are no NOUN"
	elif "expl" in c_head and spanroot.pos_ in ["NOUN"]:
	category = "There is/are + Noun complement"
	elif "expl" in c_head and spanroot.tag_ in ["NN", "NNS"]:
	category = "There is/are + Noun complement"

	elif spanroot.pos_ in ["NOUN", "PRON"]:
	if "acl" in c_dep:
	category = "Noun + Complement (attr)"
	else:
	category = "Nominal complement"

	elif not subjless and spanroot.pos_ in ['VERB', "AUX"]:
	category = "Main verb 4"

	elif spanroot.tag_ in ['NNP']:
	category = "Nominal complement"


	####################################
	### clausal ####
	####################################
	if spanroot.dep_ in ["ROOT", "advcl", "ccomp", 'acl', 'pcomp', 'relcl']:

	_check_to = [c.dep_ for c in spanroot.subtree if (c.dep_ in ["aux"] and c.pos_ in ["PART", "SCONJ"]) and c.head.dep_ == "xcomp"]
	_check_ing = [c.dep_ for c in spanroot.subtree if "Prog" in str(c.morph) and c.dep_ == "xcomp"]
	root_before_ccomp = [c.i > spanroot.i for c in spanroot.children if c.dep_ == "ccomp"]

	_check_for_to = ["_".join([c.norm_, c.dep_]) for c in spanroot.subtree if c.head.dep_ == "advcl" and (c.dep_=="mark" or c.dep_ == "aux")]
	entire_cl = spanroot.left_edge.i == span.start and spanroot.right_edge.i == span.end

	## Start with broad category, which is then re-evaluated for specific constructions.
	if spanroot.dep_ in ['advcl', 'mark', 'acl', 'pcomp']:
	## Adverbial clauses
	### Finite-adverbial clauses
	### Non-finite adverbial clauses
	subjless = all(c.dep_ not in ['nsubj', 'nsubjpass', 'csubj', 'csubjpass'] for c in spanroot.children)
	entire_cl = spanroot.left_edge.i == span.start and spanroot.right_edge.i == span.end

	if "mark" in span_dep and spanroot.pos_ in ['VERB', "AUX"]:
	category = "Finite adverbial clause"
	elif "mark" in span_dep and "aux" in span_dep :
	category = "Finite adverbial clause"

	elif "mark" in span_dep and spanroot.pos_ in ['VERB', "AUX"] and "expl" in c_dep:
	category = "Finite adverbial clause"

	elif "advmod" in span_dep and ("WRB" in span_tag or "WDT" in span_tag):
	if spanroot.pos_ in ['VERB', "AUX"]:
	category = "Finite adverbial clause"

	elif spanroot.pos_ not in ['VERB', "AUX"] and subjless:
	category = "Non-finite adv clause 1"

	elif entire_cl:
	category = "Finite adverbial clause"

	elif str(spanroot.morph) in ["Aspect=Prog\|Tense=Pres\|VerbForm=Part", "Aspect=Perf\|Tense=Past\|VerbForm=Part"] and "aux" not in c_dep:
	# he doing his job
	if argmentless:
	#e.g., frankly speaking, strictly speaking
	category = "Adverbial Phrase"
	else:
	category = "Non-finite adv clause 2"

	elif spanroot.pos_ not in ['VERB', "AUX"] and "mark" in span_dep and subjless:

	category = "Non-finite adv clause 3"

	elif "aux" in c_dep and "TO" in c_tag:
	category = "Adverbial Phrase"


	elif "mark" not in span_dep and spanroot.pos_ in ['VERB', "AUX"]:
	category = "Dependent Verb phrase"

	elif not argmentless:
	category = "Adverbial clause"

	elif spanroot.dep_ == "advcl":
	category = "Adverbial phrase"


	if spanroot.dep_ in ['relcl', 'ccomp', 'acl']:

	head = spanroot.head
	if ";" in [t.norm_ for t in head.children]:
	category = "Main verb 3"
	elif "nsubj" not in span_dep:
	category = "Dependent verb 1"
	elif "mark" in span_dep:
	category = "Complement clause"
	elif str(spanroot.morph) in ["Aspect=Prog\|Tense=Pres\|VerbForm=Part", "Aspect=Perf\|Tense=Past\|VerbForm=Part"] and "aux" not in c_dep:
	category = "Non-finite complement clause"
	elif spanroot.dep_ in ['relcl']:
	category = "Relative clause"
	elif spanroot.dep_ in ['ccomp']:
	category = "Complement clause"
	elif spanroot.dep_ in ['acl']:
	category = "Noun Complement clause"
	else:
	# print(_check_for_to)
	category = "this one"

	## Specific constructions
	# Extraposed that-clause or to-infinitives
	if ("it_nsubjpass" in c_t_dep_ or "it_nsubj" in c_t_dep_) and spanroot.pos_ in ["VERB", "AUX"]:
	print(c_dep)
	if ("acomp" in c_dep or "oprd" in c_dep) and "ccomp" in c_dep:
	#eg it seems odd (oprd) that X.
	#eg it is certain (acomp) that X.
	category = "Extraposed that-cl (adj-complement)" #e.g., it is certain that X.

	elif "xcomp" in c_dep or ("advcl" in c_dep):
	if "for_mark" in _check_for_to:
	category = "Extraposed to-cl (explicit subj)" #eg It is possible to .
	elif _check_to:
	category = "Extraposed to-cl 1" #eg It is possible to .
	elif _check_ing:
	category = "Extraposed -ing 1" #eg It is possible to .
	elif ("prep" in right_dep or "npadvmod" in right_dep) and "ccomp" in right_dep and spanroot.lemma_ == "be":
	category = "Cleft construction"

	elif "attr" in c_dep:
	category = "Extraposed that-cl (copula)" #eg It is a wonder that X.

	else:
	category = "Extraposed that-cl (VERB)"

	# if "ccomp" in c_dep and "auxpass" in c_dep and ("it_nsubjpass" in span_t_dep_ or "it_nsubj" in span_t_dep_):
	# category = "Extraposed that-cl (VERB)1" #e.g., it has been shown that X.
	elif ("it_nsubjpass" in c_t_dep_ or "it_nsubj" in c_t_dep_) and "acomp" in c_dep:
	if "xcomp" in c_dep:
	if _check_to:
	category = "Extraposed to-cl 2" #eg it is difficult to decide.
	elif _check_ing:
	category = "Extraposed -ing 2" #eg it is difficult to decide.

	else:
	category = "Extraposed that-cl (adj-complement) 2"

	elif ("it_nsubjpass" in c_t_dep_ or "it_nsubj" in c_t_dep_) and "oprd" in c_dep:

	category = "Extraposed that-cl (adj-complement) 3" #eg it seems odd that X.


	# something without dummy subject "it"
	elif (("nsubj" in c_dep and spanroot.lemma_ in ['be']) or "nsubjpass" in c_dep) and spanroot.pos_ in ["AUX", 'VERB'] and "it" not in c_norm:

	# store xcomp, if the head of the xcomp is acomp
	_check_xcomp = [c.dep_ for c in spanroot.subtree if c.dep_ in ["xcomp"] and c.head.dep_ == "acomp"]
	_check_ccomp = [c.dep_ for c in spanroot.subtree if c.dep_ in ["ccomp"] and c.head.dep_ == "acomp"]
	# _check_to = [c.dep_ for c in spanroot.subtree if (c.dep_ in ["aux"] and c.pos_ in ["PART", "SCONJ"]) and c.head.dep_ == "xcomp"]
	# _check_ing = [c.dep_ for c in spanroot.subtree if "Prog" in str(c.morph) and c.dep_ == "xcomp"]


	if ("attr" in c_dep or "acomp" in c_dep) and "ccomp" in c_dep:
	if any(root_before_ccomp):
	category = "Post-predicate that-cl"
	else:
	category = "Comment clause"

	elif ("attr" in c_dep or "acomp" in c_dep) and "ccomp" in _check_ccomp:
	category = "Post-predicate that-cl 2"

	elif ("attr" in c_dep or "acomp" in c_dep) and "xcomp" in _check_xcomp:
	category = "Post-predicate to-cl"

	elif "xcomp" in c_dep and spanroot.lemma_ in ['be'] and _check_to:
	category = "Subject predicate to-cl"

	elif "xcomp" in c_dep and "auxpass" in c_dep and _check_to:
	category = "Subject predicate to-cl (passive)"

	elif "xcomp" in c_dep and spanroot.lemma_ in ['be'] and _check_ing:
	category = "Subject predicate -ing"
	elif "ccomp" in c_dep:
	category = "Subject predicate that-cl"
	elif "acomp" in c_dep:
	category = "Adjectival predicate"

	elif "mark" in c_dep and ("nsubj" in c_dep or "nsubjpass" in c_dep):
	category = "Finite-adverbial clause"
	else:
	category = "Main verb 1"

	## without dummy subject it, and lexical verbs
	elif ("nsubj" in c_dep or "nsubjpass" in c_dep) in c_dep and spanroot.pos_ in ["AUX", 'VERB'] and "it" not in c_norm and spanroot.lemma_ not in ['be']:
	_check_wh = [c.dep_ for c in spanroot.subtree if (c.dep_ in ["attr", "advmod", 'dobj', 'nsubj'] and c.tag_ in ["WP", "WRB", "WDT", "WP$"]) and c.head.dep_ == "ccomp"]
	_check_if = [c.dep_ for c in spanroot.subtree if (c.dep_ in ["mark"] and c.norm_ in ["whether", "if"]) and c.head.dep_ == "ccomp"]

	# _check_to = [c.dep_ for c in spanroot.subtree if (c.dep_ in ["aux"] and c.pos_ in ["PART", "SCONJ"]) and c.head.dep_ == "xcomp"]
	# _check_ing = [c.dep_ for c in spanroot.subtree if "Prog" in str(c.morph) and c.dep_ == "xcomp"]

	if "ccomp" in c_dep and (_check_wh or _check_if):
	category = "Post-predicate wh-cl"

	elif "ccomp" in c_dep:
	if any(root_before_ccomp):
	category = "Post-predicate that-cl"
	else:
	category = "Comment clause"

	elif "xcomp" in c_dep:
	if _check_to:
	category = "Post-predicate to-cl"
	elif _check_ing:
	category = "Post-predicate -ing"

	# Existential
	elif "expl" in c_dep and "NOUN" in c_pos and "mark" not in c_dep:
	category = "There is/are NOUN"

	elif "ccomp" in c_dep and "it_nsubj" in span_t_dep_ and spanroot.pos_ in ["AUX"]:
	category = "Cleft construction"


	if spanroot.dep_ in ['parataxis']:
	if "_".join(span_dep) in ["nsubj_parataxis", "aux_parataxis", "nsubj_aux_parataxis"]:
	category = "Comment clause"
	else:
	category = "parataxis (for now)"


	## External comp
	if spanroot.dep_ in ['xcomp']:
	if spanroot.head.pos_ == 'ADJ' and "to_aux" in c_t_dep_:
	category = "Adjective complement to-cl"
	if spanroot.head.pos_ == 'VERB' and "to_aux" in c_t_dep_:
	category = "Verb complement to-cl"

	if spanroot.dep_ in ['pcomp']:
	if str(spanroot.morph) in ["Aspect=Prog\|Tense=Pres\|VerbForm=Part"] and 'ccomp' in c_dep:
	category = "Participle + that-cl"
	elif str(spanroot.morph) in ["Aspect=Prog\|Tense=Pres\|VerbForm=Part"]:
	category = "Participle"

	## Simple classifier
	# if spanroot.dep_ in ['pcomp']:
	# if str(spanroot.morph) in ["Aspect=Prog\|Tense=Pres\|VerbForm=Part"]:
	# category = "Gerund"

	if spanroot.dep_ in ['neg']:
	category = "Negative particle"
	if spanroot.dep_ in ['aux', 'auxpass']:
	category = "Auxiliary"

	# Modal verbs
	if spanroot.tag_ == "MD":
	category = "Modal auxiliary"


	if spanroot.dep_ in ['dep', "csubj", 'csubjpass']:
	if spanroot.head.dep_ in ['ROOT', 'ccomp'] and spanroot.head.pos_ in ['AUX', 'VERB'] and spanroot.pos_ in ['AUX', 'VERB']:
	if spanroot.morph == spanroot.head.morph:
	category = "Main verb 4"
	else:
	category = "Dependent verb 2"
	elif str(spanroot.morph) == "Aspect=Prog\|Tense=Pres\|VerbForm=Part":
	category = "Gerund"
	elif spanroot.head.dep_ in ['conj', 'acl','relcl']:
	if spanroot.morph == spanroot.head.morph:
	category = "Main verb 4"
	else:
	category = "Dependent verb 2"
	elif "VerbForm=Fin" in str(spanroot.morph):
	category = "Dependent verb 2"

	# Appositive phrases
	if spanroot.dep_ in ['appos']:
	if "nummod" in c_dep:
	category = "Apposition"
	elif spanroot.pos_ in ["PROPN"]:
	category = "Appositive Proper Nouns"
	elif spanroot.pos_ in ["NOUN"]:
	category = "Appositive Noun Phrase"
	elif spanroot.pos_ in ["VERB", "AUX"]:
	_check = any(c.dep_ in ['nsubj', 'nsubjpass', 'csubj', 'csubjpass'] for c in spanroot.children)
	if _check:
	category = "Appositive Finite-clause"

	if spanroot.dep_ in ['appos', "dep", "attr"]:
	if not subjless and spanroot.pos_ in ['VERB', "AUX"]:
	category = "Main verb 5"

	if spanroot.dep_ in ["dep", "mark"]:
	if spanroot.tag_ in ["RB", "IN", "CC"]:
	category = "Conjunction"


	#sometimes the extra-clausal links are not accurate
	if spanroot.dep_ in ['aux', "auxpass", 'oprd', 'appos', "xcomp"]:
	if spanroot.head.dep_ == "ROOT":
	category = "Main verb"
	else:
	category = "dependent verb 5"

	if span.label_ == "CITATION":
	if "NNP" in span_tag or "NNPS" in span_tag:
	if span_dep[0] == 'punct' and span_dep[-1] == 'punct':
	category = "Parenthetical Citation"
	elif span_tag[0] in ["NNP", "NNPS"]:
	category = "Narrative Citation"
	else:
	category = "Other Citation"

	if category == None:
	category = spanroot.dep_

	return category


	def construction_classifier2(doc, span):
	category = None
	spanroot = span.root

	## Grabbing lexico-grammatical information
	span_t_dep_ = ["_".join([t.norm_, t.dep_]) for t in span]
	span_dep = [t.dep_ for t in span]
	span_token = [t.norm_ for t in span]
	span_tag = [t.tag_ for t in span]


	c = [c for c in spanroot.children]
	c_t_dep_ = ["_".join([t.norm_, t.dep_]) for t in spanroot.children]

	c_norm = [c.norm_ for c in spanroot.children]
	c_dep = [c.dep_ for c in spanroot.children]
	c_pos = [c.pos_ for c in spanroot.children]
	c_tag = [c.tag_ for c in spanroot.children]

	right_dep = [c.dep_ for c in spanroot.rights]

	#conditionals
	subjless = all(c.dep_ not in ['nsubj', 'nsubjpass', 'csubj', 'csubjpass'] for c in spanroot.children)
	argmentless = all(c.dep_ not in ['nsubj', 'nsubjpass', 'csubj', 'csubjpass', "dobj", 'ccomp', 'xcomp', 'dative', "attr", "oprd", "acomp"] for c in spanroot.children)
	argless_span = all(c.dep_ not in ['nsubj', 'nsubjpass', 'csubj', 'csubjpass', "dobj", 'ccomp', 'xcomp', 'dative', "attr", "oprd", "acomp"] for c in span)
	argless_span = all(c.dep_ not in ['nsubj', 'nsubjpass', 'csubj', 'csubjpass', "dobj", 'ccomp', 'xcomp', 'dative', "attr", "oprd", "acomp"] for c in span)


	## nesting classifiers
	if spanroot.dep_ == "conj":
	while spanroot.dep_ == 'conj':
	spanroot = spanroot.head

	if spanroot.dep_ == "poss":
	head = spanroot.head
	if head.dep_ in ["pobj", "dobj", "obj", "iobj" , "dative"]:
	category = "Posessive Noun (Object)"
	elif head.dep_ in ["nsubj", "nsubjpass"]:
	category = "Posessive Noun (Subject)"
	else:
	category = "Posessive Noun (Other)"


	## Conjunctions
	# Preconjunctions
	if spanroot.dep_ in ['preconj', 'cc']:
	category = "Conjunction"

	## NOUN PHRASES
	# adverbial phrases
	if spanroot.dep_ in ['amod']:
	category = "Adjectival modifier"
	# adverbial phrases
	if spanroot.dep_ in ['compound']:
	category = "Compound noun"

	## Nominal category
	if spanroot.dep_ in ["pobj", "dobj", "obj", "iobj" , "dative"]:
	if "acl" in c_dep:
	category = "Noun + Complement (Object)"
	else:
	category = "Object"

	if spanroot.dep_ in ["nsubj", "nsubjpass"]:
	if "acl" in c_dep:
	category = "Noun + Complement (Subject)"
	else:
	category = "Subject"

	## ADJUNCTS
	# prep phrases
	if spanroot.dep_ in ['prep', 'agent']:
	category = 'Prepositional phrase'

	# adverbial phrases
	if spanroot.dep_ in ['advmod', "npadvmod", "nmod", "npmod", 'quantmod', 'nummod']:
	category = "Adverbial phrase"

	## Predication patterns
	if spanroot.dep_ in ['acomp', 'oprd']:
	if "xcomp" in c_dep:
	category = "Subject predicate to-cl"
	else:
	category = "Adjectival complement"

	if spanroot.dep_ in ['attr']:
	subjless = all(c.dep_ not in ['nsubj', 'nsubjpass', 'csubj', 'csubjpass'] for c in spanroot.children)

	c_head = [c.dep_ for c in spanroot.head.children]
	if "expl" in c_head and "no_det" in span_t_dep_:
	category = "There is/are no NOUN"
	elif "expl" in c_head and spanroot.pos_ in ["NOUN"]:
	category = "There is/are + Noun complement"
	elif "expl" in c_head and spanroot.tag_ in ["NN", "NNS"]:
	category = "There is/are + Noun complement"

	elif spanroot.pos_ in ["NOUN", "PRON"]:
	if "acl" in c_dep:
	category = "Noun + Complement (attr)"
	else:
	category = "Nominal complement"

	elif not subjless and spanroot.pos_ in ['VERB', "AUX"]:
	category = "Main verb 4"

	elif spanroot.tag_ in ['NNP']:
	category = "Nominal complement"

	## External comp
	if spanroot.dep_ in ['xcomp']:
	if spanroot.head.pos_ == 'ADJ' and "to_aux" in c_t_dep_:
	category = "Adjective complement to-cl"
	if spanroot.head.pos_ == 'VERB' and "to_aux" in c_t_dep_:
	category = "Verb complement to-cl"

	if spanroot.dep_ in ['pcomp']:
	if str(spanroot.morph) in ["Aspect=Prog\|Tense=Pres\|VerbForm=Part"] and 'ccomp' in c_dep:
	category = "Participle + that-cl"
	elif str(spanroot.morph) in ["Aspect=Prog\|Tense=Pres\|VerbForm=Part"]:
	category = "Participle"

	## Simple classifier
	# if spanroot.dep_ in ['pcomp']:
	# if str(spanroot.morph) in ["Aspect=Prog\|Tense=Pres\|VerbForm=Part"]:
	# category = "Gerund"

	if spanroot.dep_ in ['neg']:
	category = "Negative particle"
	if spanroot.dep_ in ['aux', 'auxpass']:
	category = "Auxiliary"

	# Modal verbs
	if spanroot.tag_ == "MD":
	category = "Modal auxiliary"


	####################################
	### clausal ####
	####################################
	if spanroot.dep_ in ["ROOT", "advcl", "ccomp", 'acl', 'pcomp', 'relcl', 'punct']:

	_check_to = [c.dep_ for c in spanroot.subtree if (c.dep_ in ["aux"] and c.pos_ in ["PART", "SCONJ"]) and c.head.dep_ == "xcomp"]
	_check_ing = [c.dep_ for c in spanroot.subtree if "Prog" in str(c.morph) and c.dep_ == "xcomp"]
	root_before_ccomp = [c.i > spanroot.i for c in spanroot.children if c.dep_ == "ccomp"]

	_check_for_to = ["_".join([c.norm_, c.dep_]) for c in spanroot.subtree if c.head.dep_ == "advcl" and (c.dep_=="mark" or c.dep_ == "aux")]
	entire_cl = spanroot.left_edge.i == span.start and spanroot.right_edge.i == span.end


	## Start with broad category, which is then re-evaluated for specific constructions.
	if spanroot.dep_ in ['advcl', 'acl', 'punct', 'pcomp']: #'mark',
	## Adverbial clauses
	subjless = all(c.dep_ not in ['nsubj', 'nsubjpass', 'csubj', 'csubjpass'] for c in spanroot.children)
	entire_cl = spanroot.left_edge.i == span.start and spanroot.right_edge.i == span.end

	### Finite-adverbial clauses
	if "mark" in span_dep and (spanroot.pos_ in ['VERB', "AUX"] or "aux" in span_dep ):
	category = "Finite adverbial clause"

	elif "mark" in span_dep and "aux" in span_dep :
	category = "Finite adverbial clause"

	elif "mark" in span_dep and spanroot.pos_ in ['VERB', "AUX"] and "expl" in c_dep:
	category = "Finite adverbial clause"

	elif "advmod" in span_dep and ("WRB" in span_tag or "WDT" in span_tag):
	if spanroot.pos_ in ['VERB', "AUX"]:
	category = "Finite adverbial clause"

	elif spanroot.pos_ not in ['VERB', "AUX"] and subjless:
	category = "Non-finite adv clause 1"

	elif not argmentless:
	category = "Finite adverbial clause"

	## non-finite
	elif str(spanroot.morph) in ["Aspect=Prog\|Tense=Pres\|VerbForm=Part", "Aspect=Perf\|Tense=Past\|VerbForm=Part"] and "aux" not in c_dep:
	# he doing his job
	if argmentless:
	#e.g., frankly speaking, strictly speaking
	category = "Adverbial Phrase"
	else:
	category = "Non-finite adv clause 2"

	elif spanroot.pos_ not in ['VERB', "AUX"] and "mark" in span_dep and subjless:

	category = "Non-finite adv clause 3"

	elif "aux" in c_dep and "TO" in c_tag:
	category = "Adverbial Phrase"


	elif "mark" not in span_dep and spanroot.pos_ in ['VERB', "AUX"]:
	category = "Dependent Verb phrase"

	elif not argmentless:
	category = "Adverbial clause"

	elif spanroot.dep_ == "advcl":
	category = "Adverbial phrase"

	else:
	category = "Finite adverbial clause "

	if spanroot.dep_ in ['relcl', 'ccomp', 'acl', 'punct', "pcomp"]:

	head = spanroot.head
	if ";" in [t.norm_ for t in head.children]:
	category = "Main verb 3"

	elif "nsubj" not in span_dep:
	category = "Dependent verb 1"

	elif "mark" in span_dep:
	category = "Complement clause"
	elif str(spanroot.morph) in ["Aspect=Prog\|Tense=Pres\|VerbForm=Part", "Aspect=Perf\|Tense=Past\|VerbForm=Part"] and "aux" not in c_dep:
	category = "Non-finite complement clause"
	elif spanroot.dep_ in ['relcl']:
	category = "Relative clause"
	elif spanroot.dep_ in ['ccomp']:
	category = "Complement clause"
	elif spanroot.dep_ in ['acl']:
	category = "Noun Complement clause"

	## Specific constructions
	# Extraposed that-clause or to-infinitives
	if ("it_nsubjpass" in c_t_dep_ or "it_nsubj" in c_t_dep_) and spanroot.pos_ in ["VERB", "AUX"]:
	# print(c_dep)
	if ("acomp" in c_dep or "oprd" in c_dep) and "ccomp" in c_dep:
	#eg it seems odd (oprd) that X.
	#eg it is certain (acomp) that X.
	category = "Extraposed that-cl (adj-complement)" #e.g., it is certain that X.

	elif "xcomp" in c_dep or ("advcl" in c_dep):
	if "for_mark" in _check_for_to:
	category = "Extraposed to-cl (explicit subj)" #eg It is possible to .
	elif _check_to:
	category = "Extraposed to-cl 1" #eg It is possible to .
	elif _check_ing:
	category = "Extraposed -ing 1" #eg It is possible to .
	elif ("prep" in right_dep or "npadvmod" in right_dep) and "ccomp" in right_dep and spanroot.lemma_ == "be":
	category = "Cleft construction"

	elif "attr" in c_dep:
	category = "Extraposed that-cl (copula)" #eg It is a wonder that X.

	else:
	category = "Extraposed that-cl (VERB)"

	# if "ccomp" in c_dep and "auxpass" in c_dep and ("it_nsubjpass" in span_t_dep_ or "it_nsubj" in span_t_dep_):
	# category = "Extraposed that-cl (VERB)1" #e.g., it has been shown that X.
	elif ("it_nsubjpass" in c_t_dep_ or "it_nsubj" in c_t_dep_) and "acomp" in c_dep:
	if "xcomp" in c_dep:
	if _check_to:
	category = "Extraposed to-cl 2" #eg it is difficult to decide.
	elif _check_ing:
	category = "Extraposed -ing 2" #eg it is difficult to decide.

	else:
	category = "Extraposed that-cl (adj-complement) 2"

	elif ("it_nsubjpass" in c_t_dep_ or "it_nsubj" in c_t_dep_) and "oprd" in c_dep:

	category = "Extraposed that-cl (adj-complement) 3" #eg it seems odd that X.


	# something without dummy subject "it"
	elif (("nsubj" in c_dep and spanroot.lemma_ in ['be']) or "nsubjpass" in c_dep) and spanroot.pos_ in ["AUX", 'VERB'] and "it" not in c_norm:

	# store xcomp, if the head of the xcomp is acomp
	_check_xcomp = [c.dep_ for c in spanroot.subtree if c.dep_ in ["xcomp"] and c.head.dep_ == "acomp"]
	_check_ccomp = [c.dep_ for c in spanroot.subtree if c.dep_ in ["ccomp"] and c.head.dep_ == "acomp"]
	# _check_to = [c.dep_ for c in spanroot.subtree if (c.dep_ in ["aux"] and c.pos_ in ["PART", "SCONJ"]) and c.head.dep_ == "xcomp"]
	# _check_ing = [c.dep_ for c in spanroot.subtree if "Prog" in str(c.morph) and c.dep_ == "xcomp"]


	if ("attr" in c_dep or "acomp" in c_dep) and "ccomp" in c_dep:
	if any(root_before_ccomp):
	category = "Post-predicate that-cl"
	else:
	category = "Comment clause"

	elif ("attr" in c_dep or "acomp" in c_dep) and "ccomp" in _check_ccomp:
	category = "Post-predicate that-cl 2"

	elif ("attr" in c_dep or "acomp" in c_dep) and "xcomp" in _check_xcomp:
	category = "Post-predicate to-cl"

	elif "xcomp" in c_dep and spanroot.lemma_ in ['be'] and _check_to:
	category = "Subject predicate to-cl"

	elif "xcomp" in c_dep and "auxpass" in c_dep and _check_to:
	category = "Subject predicate to-cl (passive)"

	elif "xcomp" in c_dep and spanroot.lemma_ in ['be'] and _check_ing:
	category = "Subject predicate -ing"
	elif "ccomp" in c_dep:
	category = "Subject predicate that-cl"
	elif "acomp" in c_dep:
	category = "Adjectival predicate"

	elif "mark" in c_dep and ("nsubj" in c_dep or "nsubjpass" in c_dep):
	category = "Finite-adverbial clause"
	elif not argmentless and "SCONJ" in c_pos:
	category = "Finite-adverbial clause"
	else:
	category = "Main verb 1"

	## without dummy subject it, and lexical verbs
	elif ("nsubj" in c_dep or "nsubjpass" in c_dep) in c_dep and spanroot.pos_ in ["AUX", 'VERB'] and "it" not in c_norm and spanroot.lemma_ not in ['be']:
	_check_wh = [c.dep_ for c in spanroot.subtree if (c.dep_ in ["attr", "advmod", 'dobj', 'nsubj'] and c.tag_ in ["WP", "WRB", "WDT", "WP$"]) and c.head.dep_ == "ccomp"]
	_check_if = [c.dep_ for c in spanroot.subtree if (c.dep_ in ["mark"] and c.norm_ in ["whether", "if"]) and c.head.dep_ == "ccomp"]

	# _check_to = [c.dep_ for c in spanroot.subtree if (c.dep_ in ["aux"] and c.pos_ in ["PART", "SCONJ"]) and c.head.dep_ == "xcomp"]
	# _check_ing = [c.dep_ for c in spanroot.subtree if "Prog" in str(c.morph) and c.dep_ == "xcomp"]

	if "ccomp" in c_dep and (_check_wh or _check_if):
	category = "Post-predicate wh-cl"

	elif "ccomp" in c_dep:
	if any(root_before_ccomp):
	category = "Post-predicate that-cl"
	else:
	category = "Comment clause"

	elif "xcomp" in c_dep:
	if _check_to:
	category = "Post-predicate to-cl"
	elif _check_ing:
	category = "Post-predicate -ing"



	# Existential
	elif "expl" in c_dep and "NOUN" in c_pos and "mark" not in c_dep:
	category = "There is/are NOUN"

	elif "ccomp" in c_dep and "it_nsubj" in span_t_dep_ and spanroot.pos_ in ["AUX"]:
	category = "Cleft construction"

	### The end of clausal analysis

	if spanroot.dep_ in ['parataxis']:
	if "_".join(span_dep) in ["nsubj_parataxis", "aux_parataxis", "nsubj_aux_parataxis"]:
	category = "Comment clause"
	else:
	category = "Parataxis"


	if spanroot.dep_ in ['dep', "csubj", 'csubjpass']:
	if spanroot.head.dep_ in ['ROOT', 'ccomp'] and spanroot.head.pos_ in ['AUX', 'VERB'] and spanroot.pos_ in ['AUX', 'VERB']:
	if spanroot.morph == spanroot.head.morph:
	category = "Main verb 4"
	else:
	category = "Dependent verb 2"
	elif str(spanroot.morph) == "Aspect=Prog\|Tense=Pres\|VerbForm=Part":
	category = "Gerund"
	elif "VerbForm=Fin" in str(spanroot.morph) or "VerbForm=Inf" in str(spanroot.morph):
	category = "Dependent verb 2"
	elif spanroot.dep_ in ["csubj", 'csubjpass']:
	category = "Dependent verb (csubj)"


	# Appositive phrases
	if spanroot.dep_ in ['appos']:
	if "nummod" in c_dep:
	category = "Apposition"
	if spanroot.pos_ in ["PROPN"]:
	category = "Appositive Proper Nouns"
	elif spanroot.pos_ in ["NOUN"]:
	category = "Appositive Noun Phrase"
	elif spanroot.pos_ in ["VERB", "AUX"]:
	_check = any(c.dep_ in ['nsubj', 'nsubjpass', 'csubj', 'csubjpass'] for c in spanroot.children)
	if _check:
	category = "Appositive Finite-clause"


	if spanroot.dep_ in ['appos', "dep", "attr"]:
	if not subjless and spanroot.pos_ in ['VERB', "AUX"]:
	category = "Main verb (likely parsing error)"

	#sometimes the dep are on the conjunctions
	if spanroot.dep_ in ["dep", "mark"]:
	if spanroot.tag_ in ["RB", "IN", "CC"]:
	category = "Conjunction"

	if spanroot.dep_ in ["intj"]:
	category = "Introjection"


	#sometimes the extra-clausal links are not accurate
	if spanroot.dep_ in ['aux', "auxpass", 'oprd', 'appos', "xcomp", "attr", 'dep', "meta", 'prt'] and category == None:
	if spanroot.head.dep_ == "ROOT":
	category = "Main verb"
	else:
	category = "dependent verb 5"

	if span.label_ == "CITATION":
	if "NNP" in span_tag or "NNPS" in span_tag:
	if span_dep[0] == 'punct' and span_dep[-1] == 'punct':
	category = "Parenthetical Citation"
	elif span_tag[0] in ["NNP", "NNPS"]:
	category = "Narrative Citation"
	else:
	category = "Other Citation"

	if category == None:
	category = spanroot.dep_

	return category



	def const_table(doc: Union[spacy.tokens.Doc, Dict[str, str]],
	spans_key: str = "sc",
	attrs: List[str] = SPAN_ATTRS):
	columns = attrs + ["Conf. score", "sent no.", "grammatical realization", 'span dep', "ner",
	"POS", 'span dep seq', "TAG sequence", "POS sequence", "head", "head dep", "children", "morphology", "sent"]
	data = []
	# data = span_info_aggregator(doc, columns)
	sentences = {s: i for i, s in enumerate(doc.sents)}

	for span, score in zip(doc.spans[spans_key], doc.spans[spans_key].attrs['scores']):

	span_info = []
	span_info.extend([str(getattr(span, attr)) for attr in attrs])

	span_info.append(score)
	span_info.append(int(sentences[span.sent]))
	span_info.append(construction_classifier2(doc, span))
	span_info.append(span.root.dep_)
	span_info.append(span.root.ent_type_)
	span_info.append(span.root.tag_)
	span_info.append("_".join([t.dep_ for t in span]))
	span_info.append("_".join([t.tag_ for t in span]))
	span_info.append("_".join([t.pos_ for t in span]))
	span_info.append(span.root.head.norm_)
	span_info.append(span.root.head.dep_)
	span_info.append("_".join([c.dep_ for c in span.root.children]))
	span_info.append(span.root.morph)
	span_info.append(span.sent.text.strip())

	data.append(span_info)

	return data, columns


	def ngrammar(seq: list, n=2, concat = False, sep = "-"):
	result = []
	n_item = len(seq)
	for idx, item in enumerate(seq):
	if idx + n <= n_item:
	if concat:
	result.append(sep.join(seq[idx: idx + n]))
	else:
	result.append(seq[idx: idx + n])
	return result


	def diversity_values(count_vec: list):
	result = {}
	if len(count_vec) == 0:
	count_vec = [0,0,0,0,0,0,0,0,0,0]

	result['shannon'] = dv.alpha.shannon(list(count_vec), base=2)
	result['brillouin_d'] = dv.alpha.brillouin_d(list(count_vec))
	result["simpson_d"] = 1- dv.alpha.simpson(list(count_vec))
	result['simpson_e'] = dv.alpha.simpson_e(list(count_vec))
	# result['gini_index'] = dv.alpha.gini_index(list(count_vec))
	# result['faith_pd'] = dv.alpha.faith_pd(list(count_vec))

	return result