Spaces:

huspacy
/

example-applications

Runtime error

App Files Files Community

Szabó Gergő commited on Jun 21, 2022

Commit

2ecc574

1 Parent(s): 19cfb7e

triples package

Browse files

Files changed (2) hide show

examples/relation.py +12 -12
resources/triples.py +125 -0

examples/relation.py CHANGED Viewed

@@ -1,12 +1,9 @@
 import gradio as gr
 import spacy
-import sys
 import pandas as pd
-from spacy import displacy
-sys.path.append('/home/gszabo/PycharmProjects/textacy/textacy/src/textacy/extract')
-import triples
 nlp = spacy.load("hu_core_news_lg")
@@ -41,14 +38,17 @@ def process(text: str) -> pd.DataFrame:
     return pd.DataFrame(relation_list, columns=['Subject', 'Verb', 'Object'])
-EXAMPLES = ["Vespucci 1450-es években született Firenzében, és 1497 és 1504 között legalább két felfedező úton vett részt.",
-            "Einstein megmutatta, ha feltételezi, hogy a fény valóban csak diszkrét csomagokban terjed, akkor meg tudja magyarázni a fényelektromos jelenség furcsa tulajdonságait.",
-            "Einstein megállapította, hogy hasonló energiaeloszlás lehet érvényes az atomokra is.",
-            "Hawking úgy nyilatkozott, hogy a felfedezései az élete legizgalmasabb eseményei voltak."]
-# displacy checker
-# text = nlp(EXAMPLES[3])
-# displacy.serve(text, style="dep")
 demo = gr.Interface(
     fn=process,

 import gradio as gr
 import spacy
 import pandas as pd
+from resources import triples
 nlp = spacy.load("hu_core_news_lg")
     return pd.DataFrame(relation_list, columns=['Subject', 'Verb', 'Object'])
+EXAMPLES = ["Anna éppen most házat épít magának.",
+            "András főzni fog, ha haza ért.",
+            "Jéghideg narancslevet fogok kortyolni Mallorca homokos partján.",
+            "Júliska fagyit fog árulni.",
+            "Einstein megmutatta, hogy hogyan kell házat építeni.",
+            "Vespucci 1497 és 1504 között legalább két felfedező úton vett részt.",
+            "Einstein megállapította, hogy az atomokra hasonló energiaeloszlás lehet érvényes.",
+            "Hawking úgy nyilatkozott, hogy a felfedezései az élete legizgalmasabb eseményei voltak.",
+            "Einstein megmutatta, ha feltételezi, hogy a fény valóban csak diszkrét csomagokban terjed, akkor meg tudja magyarázni a fényelektromos jelenség furcsa tulajdonságait."]
+# process(EXAMPLES[4])
 demo = gr.Interface(
     fn=process,

resources/triples.py ADDED Viewed

	@@ -0,0 +1,125 @@

+"""
+Triples
+-------
+:mod:`textacy.extract.triples`: Extract structured triples from a document or sentence
+through rule-based pattern-matching of the annotated tokens.
+"""
+from __future__ import annotations
+import collections
+from operator import attrgetter
+from typing import Iterable, List, Tuple
+from spacy.symbols import (
+    AUX, VERB,
+    agent, attr, aux, auxpass, csubj, csubjpass, dobj, neg, nsubj, nsubjpass, obj, pobj, xcomp,
+)
+from spacy.tokens import Span, Token
+from textacy import types
+_NOMINAL_SUBJ_DEPS = {nsubj, nsubjpass}
+_CLAUSAL_SUBJ_DEPS = {csubj, csubjpass}
+_ACTIVE_SUBJ_DEPS = {csubj, nsubj}
+_VERB_MODIFIER_DEPS = {aux, auxpass, neg}
+SVOTriple: Tuple[List[Token], List[Token], List[Token]] = collections.namedtuple(
+    "SVOTriple", ["subject", "verb", "object"]
+)
+def subject_verb_object_triples(doclike: types.DocLike) -> Iterable[SVOTriple]:
+    """
+    Extract an ordered sequence of subject-verb-object triples from a document
+    or sentence.
+    Args:
+        doclike
+    Yields:
+        Next SVO triple as (subject, verb, object), in approximate order of appearance.
+    """
+    if isinstance(doclike, Span):
+        sents = [doclike]
+    else:
+        sents = doclike.sents
+    for sent in sents:
+        # connect subjects/objects to direct verb heads
+        # and expand them to include conjuncts, compound nouns, ...
+        verb_sos = collections.defaultdict(lambda: collections.defaultdict(set))
+        for tok in sent:
+            head = tok.head
+            # ensure entry for all verbs, even if empty
+            # to catch conjugate verbs without direct subject/object deps
+            if tok.pos == VERB:
+                _ = verb_sos[tok]
+            # nominal subject of active or passive verb
+            if tok.dep in _NOMINAL_SUBJ_DEPS:
+                if head.pos == VERB:
+                    verb_sos[head]["subjects"].update(expand_noun(tok))
+            # clausal subject of active or passive verb
+            elif tok.dep in _CLAUSAL_SUBJ_DEPS:
+                if head.pos == VERB:
+                    verb_sos[head]["subjects"].update(tok.subtree)
+            # nominal direct object of transitive verb
+            elif tok.dep == obj:
+                if head.pos == VERB:
+                    verb_sos[head]["objects"].update(expand_noun(tok))
+            # prepositional object acting as agent of passive verb
+            elif tok.dep == pobj:
+                if head.dep == agent and head.head.pos == VERB:
+                    verb_sos[head.head]["objects"].update(expand_noun(tok))
+            # open clausal complement, but not as a secondary predicate
+            elif tok.dep == xcomp:
+                if (
+                    head.pos == VERB
+                    and not any(child.dep == obj for child in head.children)
+                ):
+                    # TODO: just the verb, or the whole tree?
+                    # verb_sos[verb]["objects"].update(expand_verb(tok))
+                    verb_sos[head]["objects"].update(tok.subtree)
+        # fill in any indirect relationships connected via verb conjuncts
+        for verb, so_dict in verb_sos.items():
+            conjuncts = verb.conjuncts
+            if so_dict.get("subjects"):
+                for conj in conjuncts:
+                    conj_so_dict = verb_sos.get(conj)
+                    if conj_so_dict and not conj_so_dict.get("subjects"):
+                        conj_so_dict["subjects"].update(so_dict["subjects"])
+            if not so_dict.get("objects"):
+                so_dict["objects"].update(
+                    obj
+                    for conj in conjuncts
+                    for obj in verb_sos.get(conj, {}).get("objects", [])
+                )
+        # expand verbs and restructure into svo triples
+        for verb, so_dict in verb_sos.items():
+            if so_dict["subjects"] and so_dict["objects"]:
+                yield SVOTriple(
+                    subject=sorted(so_dict["subjects"], key=attrgetter("i")),
+                    verb=sorted(expand_verb(verb), key=attrgetter("i")),
+                    object=sorted(so_dict["objects"], key=attrgetter("i")),
+                )
+def expand_noun(tok: Token) -> List[Token]:
+    """Expand a noun token to include all associated conjunct and compound nouns."""
+    tok_and_conjuncts = [tok] + list(tok.conjuncts)
+    compounds = [
+        child
+        for tc in tok_and_conjuncts
+        for child in tc.children
+        # TODO: why doesn't compound import from spacy.symbols?
+        if child.dep_ == "compound"
+    ]
+    return tok_and_conjuncts + compounds
+def expand_verb(tok: Token) -> List[Token]:
+    """Expand a verb token to include all associated auxiliary and negation tokens."""
+    verb_modifiers = [
+        child for child in tok.children if child.dep in _VERB_MODIFIER_DEPS
+    ]
+    return [tok] + verb_modifiers