Spaces:

egumasa
/

engagement-analyzer-demo

Running

App Files Files Community

engagement-analyzer-demo / pipeline /post_processors.py

egumasa

updated model

7cf7080 over 1 year ago

raw

history blame

No virus

6.34 kB


	from typing import List, Sequence, Tuple, Optional, Dict, Union, Callable
	import pandas as pd
	import spacy
	from spacy.language import Language

	SPAN_ATTRS = ["text", "label_", "start", "end"]


	def simple_table(doc: Union[spacy.tokens.Doc, Dict[str, str]],
	spans_key: str = "sc",
	attrs: List[str] = SPAN_ATTRS):
	columns = attrs + ["Conf. score"]
	data = [
	[str(getattr(span, attr))
	for attr in attrs] + [score] # [f'{score:.5f}']
	for span, score in zip(doc.spans[spans_key], doc.spans[spans_key].attrs['scores'])
	]
	return data, columns


	# def span_info_aggregator()

	def construction_classifier(doc, span):
	category = span.root.dep_
	spanroot = span.root

	##
	span_t_dep_ = ["_".join([t.norm_, t.dep_]) for t in span]
	span_dep = [t.dep_ for t in span]
	span_token = [t.norm_ for t in span]
	span_tag = [t.tag_ for t in span]


	c_dep = [c.dep_ for c in spanroot.children]
	c_pos = [c.pos_ for c in spanroot.children]
	c_tag = [c.tag_ for c in spanroot.children]

	## nesting classifiers
	if spanroot.dep_ == "conj":
	while spanroot.dep_ == 'conj':
	spanroot = spanroot.head
	if spanroot.dep_ == "poss":
	while spanroot.dep_ == 'poss':
	spanroot = spanroot.head


	## Simple classifier
	if spanroot.dep_ in ['pcomp']:
	if str(spanroot.morph) in ["Aspect=Prog\|Tense=Pres\|VerbForm=Part"]:
	category = "Gerund"


	if spanroot.dep_ in ["pobj", "dobj", "obj", "iobj"]:
	category = "Object"
	if spanroot.dep_ in ["nsubj", "nsubjpass"]:
	category = "Subject"
	if spanroot.dep_ in ["cc"]:
	category = "Coordinating conjunction"

	if spanroot.dep_ in ["ROOT", "advcl"]:
	if "ccomp" in c_dep and "auxpass" in c_dep and ("it_nsubjpass" in span_t_dep_ or "it_nsubj" in span_t_dep_):
	category = "It is X that-clause"
	elif "nsubj" in c_dep and "acomp" in c_dep and ("it_nsubjpass" in span_t_dep_ or "it_nsubj" in span_t_dep_):
	category = "It is X that-clause"
	elif "nsubj" in c_dep and "oprd" in c_dep and ("it_nsubjpass" in span_t_dep_ or "it_nsubj" in span_t_dep_):
	category = "It is X that-clause"
	elif "nsubj" in c_dep and "it" in span_token and spanroot.pos_ == "VERB":
	category = "It VERB that-clause"
	elif "expl" in c_dep and "NOUN" in c_pos:
	category = "There is/are NOUN"
	elif spanroot.pos_ in ["AUX", 'VERB']:
	category = "Main verb"
	else:
	category = spanroot.dep_

	if spanroot.dep_ in ['attr']:
	c_head = [c.dep_ for c in spanroot.head.children]
	if "expl" in c_head and "no_det" in span_t_dep_:
	category = "There is/are no NOUN"


	# Modal verbs
	if spanroot.tag_ == "MD":
	category = "Modal auxiliary"
	# prep phrases
	if spanroot.dep_ in ['prep']:
	category = 'Prepositional Phrase'
	# adverbial phrases
	if spanroot.dep_ in ['advmod']:
	category = "Adverbial modifier"
	# adverbial phrases
	if spanroot.dep_ in ['acomp']:
	category = "Adjectival complement"

	if spanroot.dep_ in ['neg']:
	category = "Negative particle"

	# Preconjunctions
	if spanroot.dep_ in ['preconj']:
	category = "Conjunction"

	# Adverbial clauses
	## Check the status of the adverbial clauses carefully
	if spanroot.dep_ in ['advcl', 'mark', 'acl']:
	if "mark" in span_dep:
	category = "Finite adverbial clause"
	if str(spanroot.morph) in ["Aspect=Prog\|Tense=Pres\|VerbForm=Part"] and "aux" not in c_dep:
	category = "Non-finite adv clause"
	# Check whether it has a subject or not
	# elif "nsubj" in [c.dep_ for c in spanroot.children]:
	# category = "Adverbial clauses"
	# else:
	# category = "Other advcl"

	if spanroot.dep_ in ['relcl', 'ccomp']:
	head = spanroot.head
	if ";" in [t.norm_ for t in head.children]:
	category = "Main verb"
	elif "nsubj" not in span_dep:
	category = "Dependent verb"

	if spanroot.dep_ in ['dep']:
	if spanroot.head.dep_ in ['ROOT', 'ccomp'] and spanroot.head.pos_ in ['AUX', 'VERB'] and spanroot.pos_ in ['AUX', 'VERB']:
	if spanroot.morph == spanroot.head.morph:
	category = "Main verb"
	else:
	category = "Dependent verb"




	if span.label_ == "CITATION":
	if "NNP" in span_tag or "NNPS" in span_tag:
	if span_dep[0] == 'punct' and span_dep[-1] == 'punct':
	category = "Parenthetical Citation"
	elif span_tag[0] in ["NNP", "NNPS"]:
	category = "Narrative Citation"
	else:
	category = "Other Citation"


	return category


	def const_table(doc: Union[spacy.tokens.Doc, Dict[str, str]],
	spans_key: str = "sc",
	attrs: List[str] = SPAN_ATTRS):
	columns = attrs + ["Conf. score", "sent no.", "grammatical realization", 'span dep', "ner",
	"POS", 'span dep seq', "POS sequence", "head", "children", "morphology", ]
	data = []
	# data = span_info_aggregator(doc, columns)
	sentences = {s: i for i, s in enumerate(doc.sents)}

	for span, score in zip(doc.spans[spans_key], doc.spans[spans_key].attrs['scores']):

	span_info = []
	span_info.extend([str(getattr(span, attr)) for attr in attrs])

	span_info.append(score)
	span_info.append(sentences[span.sent])
	span_info.append(construction_classifier(doc, span))
	span_info.append(span.root.dep_)
	span_info.append(span.root.ent_type_)
	span_info.append(span.root.tag_)
	span_info.append("_".join([t.dep_ for t in span]))
	span_info.append("_".join([t.tag_ for t in span]))
	span_info.append(span.root.head.norm_)
	span_info.append("_".join([c.dep_ for c in span.root.children]))
	span_info.append(span.root.morph)
	data.append(span_info)

	return data, columns


	def ngrammar(seq: list, n=2):
	result = []
	n_item = len(seq)
	for idx, item in enumerate(seq):
	if idx + n <= n_item:
	result.append(seq[idx: idx + n])
	return result