oroszgy commited on
Commit
539b929
2 Parent(s): b27f1b3 265d133

Merge pull request #3 from huspacy/relation

Browse files
README.md CHANGED
@@ -19,4 +19,4 @@ This repository presents some practical examples on using HuSpaCy for various te
19
 
20
  To start the demo: `poetry run python app.py`
21
 
22
- If you upgrade dependencies via poetry don't forget to update `requirements.txt` with ` poetry export --without-hashes > requirements.txt`
 
19
 
20
  To start the demo: `poetry run python app.py`
21
 
22
+ If you upgrade dependencies via poetry don't forget to update `requirements.txt` with `poetry export --without-hashes > requirements.txt`
app.py CHANGED
@@ -1,14 +1,15 @@
1
  import gradio as gr
2
  from examples.keyphrases import demo as keyphrases_demo
3
  from examples.anon import demo as anon_demo
 
4
 
5
  demo = gr.Blocks()
6
 
7
  with demo:
8
  gr.Markdown("# HuSpaCy Examples")
9
  gr.TabbedInterface(
10
- interface_list=[keyphrases_demo, anon_demo],
11
- tab_names=["Keyphrase extraction", "Text anonymization"],
12
  )
13
 
14
  if __name__ == '__main__':
 
1
  import gradio as gr
2
  from examples.keyphrases import demo as keyphrases_demo
3
  from examples.anon import demo as anon_demo
4
+ from examples.relation import demo as relation_demo
5
 
6
  demo = gr.Blocks()
7
 
8
  with demo:
9
  gr.Markdown("# HuSpaCy Examples")
10
  gr.TabbedInterface(
11
+ interface_list=[keyphrases_demo, anon_demo, relation_demo],
12
+ tab_names=["Keyphrase extraction", "Text anonymization", "Relation Extraction"],
13
  )
14
 
15
  if __name__ == '__main__':
examples/anon.py CHANGED
@@ -1,26 +1,26 @@
1
  from typing import Tuple, List
2
 
3
  import gradio as gr
4
-
5
  from presidio_analyzer import AnalyzerEngine
6
- from presidio_analyzer.nlp_engine import NlpEngineProvider
7
  from presidio_anonymizer import AnonymizerEngine
8
-
9
- from faker import Faker
10
  from presidio_anonymizer.entities.engine import OperatorConfig
 
11
 
 
12
 
13
- def process(text: str, fake_data: bool, entities: List) -> Tuple[str, List]:
14
- configuration = {
15
- "nlp_engine_name": "spacy",
16
- "models": [{"lang_code": "hu", "model_name": "hu_core_news_lg", }],
17
- }
18
 
19
- provider = NlpEngineProvider(nlp_configuration=configuration)
20
- nlp_engine = provider.create_engine()
 
 
 
 
 
 
21
 
22
- analyzer = AnalyzerEngine(nlp_engine=nlp_engine,
23
- supported_languages=["hu"],)
24
 
25
  results = analyzer.analyze(
26
  text=text, entities=entities, language="hu")
@@ -42,17 +42,25 @@ def process(text: str, fake_data: bool, entities: List) -> Tuple[str, List]:
42
 
43
  anonymizer = AnonymizerEngine()
44
  anonymized_text = anonymizer.anonymize(
45
- text=text, analyzer_results=results, operators=fake_operators) if fake_data else anonymizer.anonymize(text=text, analyzer_results=results)
 
46
 
47
  return anonymized_text.text, anonymized_text.items
48
 
49
 
50
  EXAMPLES = [
51
- ["Vespucci 1450-es években született Firenzében, és 1497 és 1504 között legalább két felfedező úton vett részt – az egyiket spanyol, a másikat portugál támogatással.", False, ["PERSON", "LOCATION"]],
52
- ["Elon Musk 1971-ben született a Dél-afrikai Köztársaságban, anyja Maye Musk (született: Haldeman) modell, apja Errol Musk mérnök, pilóta.", True, [
 
 
 
 
53
  "PERSON", "LOCATION"]],
54
- ["Vespucci 1450-es években született Firenzében, és 1497 és 1504 között legalább két felfedező úton vett részt. Bárorító leveleket a vespucci@deojeda.es email-címre várt, mellette működött egy hangrögzítője is a +3903827802737 telefonszámon. Adományokat a bitcoin tárcájába (1Fsb3io3hj1jKaRCTRQ89Du88Dp7NxgEcU), bankkártyájára (5200 8282 8282 8210) és IBAN számlaszámára (ES8201289482186115378819) fogadott. Utazási blogja a https://firenze.it/vespucci címen volt elérhető. Legutóbb 1503-03-15-én publikált, ezt a 192.168.0.1 ip-címről tette meg.", True,
55
- ["PERSON", "LOCATION", "EMAIL_ADDRESS", "PHONE_NUMBER", "CRYPTO", "IP_ADDRESS", "URL", "DATE_TIME", "CREDIT_CARD", "IBAN_CODE"]],
 
 
 
56
  ]
57
 
58
  demo = gr.Interface(
@@ -60,7 +68,9 @@ demo = gr.Interface(
60
  inputs=[gr.Textbox(value=EXAMPLES[0][0], lines=10, label="Input text", show_label=True),
61
  gr.Checkbox(value=EXAMPLES[0][1],
62
  label="Apply de-identification", show_label=True),
63
- gr.CheckboxGroup(['PERSON', 'LOCATION', 'DATE_TIME', 'IP_ADDRESS', 'URL', 'EMAIL_ADDRESS', 'PHONE_NUMBER', 'CREDIT_CARD', 'IBAN_CODE', 'CRYPTO'], label="Entities", show_label=True, value=EXAMPLES[0][2])],
 
 
64
  outputs=[gr.Textbox(label="Anonymized text", show_label=True),
65
  gr.Textbox(label="Tags", show_label=True)],
66
  examples=EXAMPLES,
 
1
  from typing import Tuple, List
2
 
3
  import gradio as gr
4
+ from faker import Faker
5
  from presidio_analyzer import AnalyzerEngine
6
+ from presidio_analyzer.nlp_engine import SpacyNlpEngine
7
  from presidio_anonymizer import AnonymizerEngine
 
 
8
  from presidio_anonymizer.entities.engine import OperatorConfig
9
+ from spacy import Language
10
 
11
+ from examples.common import NLP
12
 
 
 
 
 
 
13
 
14
+ # noinspection PyMissingConstructor
15
+ class HuSpaCyNlpEngine(SpacyNlpEngine):
16
+ def __init__(self, nlp: Language):
17
+ self.nlp = {"hu": nlp}
18
+
19
+
20
+ def process(text: str, fake_data: bool, entities: List) -> Tuple[str, List]:
21
+ nlp_engine = HuSpaCyNlpEngine(NLP)
22
 
23
+ analyzer = AnalyzerEngine(nlp_engine=nlp_engine, supported_languages=["hu"])
 
24
 
25
  results = analyzer.analyze(
26
  text=text, entities=entities, language="hu")
 
42
 
43
  anonymizer = AnonymizerEngine()
44
  anonymized_text = anonymizer.anonymize(
45
+ text=text, analyzer_results=results, operators=fake_operators) if fake_data else anonymizer.anonymize(text=text,
46
+ analyzer_results=results)
47
 
48
  return anonymized_text.text, anonymized_text.items
49
 
50
 
51
  EXAMPLES = [
52
+ [
53
+ "Vespucci 1450-es években született Firenzében, és 1497 és 1504 között legalább két felfedező úton vett részt – az egyiket spanyol, a másikat portugál támogatással.",
54
+ False, ["PERSON", "LOCATION"]],
55
+ [
56
+ "Elon Musk 1971-ben született a Dél-afrikai Köztársaságban, anyja Maye Musk (született: Haldeman) modell, apja Errol Musk mérnök, pilóta.",
57
+ True, [
58
  "PERSON", "LOCATION"]],
59
+ [
60
+ "Vespucci 1450-es években született Firenzében, és 1497 és 1504 között legalább két felfedező úton vett részt. Bárorító leveleket a vespucci@deojeda.es email-címre várt, mellette működött egy hangrögzítője is a +3903827802737 telefonszámon. Adományokat a bitcoin tárcájába (1Fsb3io3hj1jKaRCTRQ89Du88Dp7NxgEcU), bankkártyájára (5200 8282 8282 8210) és IBAN számlaszámára (ES8201289482186115378819) fogadott. Utazási blogja a https://firenze.it/vespucci címen volt elérhető. Legutóbb 1503-03-15-én publikált, ezt a 192.168.0.1 ip-címről tette meg.",
61
+ True,
62
+ ["PERSON", "LOCATION", "EMAIL_ADDRESS", "PHONE_NUMBER", "CRYPTO", "IP_ADDRESS", "URL", "DATE_TIME",
63
+ "CREDIT_CARD", "IBAN_CODE"]],
64
  ]
65
 
66
  demo = gr.Interface(
 
68
  inputs=[gr.Textbox(value=EXAMPLES[0][0], lines=10, label="Input text", show_label=True),
69
  gr.Checkbox(value=EXAMPLES[0][1],
70
  label="Apply de-identification", show_label=True),
71
+ gr.CheckboxGroup(
72
+ ['PERSON', 'LOCATION', 'DATE_TIME', 'IP_ADDRESS', 'URL', 'EMAIL_ADDRESS', 'PHONE_NUMBER', 'CREDIT_CARD',
73
+ 'IBAN_CODE', 'CRYPTO'], label="Entities", show_label=True, value=EXAMPLES[0][2])],
74
  outputs=[gr.Textbox(label="Anonymized text", show_label=True),
75
  gr.Textbox(label="Tags", show_label=True)],
76
  examples=EXAMPLES,
examples/common.py CHANGED
@@ -5,7 +5,7 @@ from typing import Dict, List
5
  import spacy
6
  from spacy import Language
7
 
8
- NLP: Language = spacy.load("hu_core_news_lg")
9
 
10
 
11
  def _compute_idf(freq_file: Path) -> Dict[str, float]:
@@ -26,31 +26,3 @@ def _compute_idf(freq_file: Path) -> Dict[str, float]:
26
 
27
 
28
  IDF: Dict[str, float] = _compute_idf(Path(__file__).parent.parent / "resources" / "freq.list")
29
-
30
- NEWS_EXAMPLES = [
31
- """A magyar futballválogatott negyedik Nemzetek Ligája mérkőzésén másodszor nyert, a hazai 1-0-s siker után idegenben 4-0-val megsemmisítette Angliát. Nagy győzelem volt, az enervált angolokat a saját közönségük fütyülte ki, miután a második félidőben el sem találták a kaput. 96 éve nem kaptak ekkora verést az angolok hazai pályán.
32
- Az angol kapitány, Gareth Southgate kilenc helyen változtatott azon a csapaton, amelyik az olaszok ellen gól nélküli döntetlent játszott szombaton. Marco Rossi a kapuban cserélt, Dibusz Dénes állt a gólvonalon, Schäfer András visszatért a középpályára, miután a németek ellen letöltötte eltiltását. A 3-4-2-1-es felállás ezúttal sem változott. Ha végig akarja nézni helyszíni közvetítésünket, mit műveltek az angolok a Himnusz alatt, itt megteheti.
33
- Az első helyzetre a hatodik percig kellett várni, akkor Kane passzából James húzott el a bal oldalon, középre adta a labdát, Bowen fejelt, Nagy Zsolt a helyén volt, és mentett.
34
- Az első magyar lövés rögtön a kapuban landolt.
35
- Szoboszlai ívelt be szabadrúgást a tizenhatoson belülre, Lang és Stone ugrott fel fejelni, a magyar védő megelőzte ellenfelét. A labda Kane talpa alatt elcsúszott, Sallai combbal átvette, azonnal lőtt 7 méterről, mielőtt még Phillips odaért volna, Ramsdale ugyan beleért, de kiütni már nem tudta. Szoboszlainak volt egy másik, sokkal közelebbről elvégzett szabadrúgása, amit igen veszélyesen lőtt be a kapu elé, James a gólvonalról mentett, Szalai Attila elől. A kipattanóból az angolok egy gyors kontrát vezettek, de a magyar tízes visszafutott, és a tizenhatoson belül szerelni tudott.
36
- A magyar válogatott nem volt nagy nyomás alatt, az angolok akkor jártak legközelebb a gólhoz, amikor Orbán a 36. percben a saját kapuja felé fejelt, de Dibusz akkor is a helyén volt. A hazai szögletek veszélyesek voltak, de egyik sem annyira, hogy a szívünkhöz kellett volna kapnunk.
37
- A második félidőben az angolok felgyorsították a játékukat, de igazán komoly helyzetet nem tudtak kialakítani, Rossi pedig már az 55. percben érezte, hogy frissíteni kell, és Szoboszlai helyére Gazdag, Styles helyére pedig Nagy Ádám állt.
38
- Mivel a válogatott visszaállt, és fegyelmezetten zárta le a területeket, az angolok ötlettelen, olykor lassú adogatása veszélytelen volt. A csapat ezúttal is igazolta, mennyire képes megnehezíteni, megkeseríteni a riválisai dolgát.
39
- A hajrában jött a varázslat Most azonban azt is igazolta, hogy egy pillanat alatt a kapu elé tud kerülni. A Szalai helyére beálló Ádám Martin megharcolt egy labdáért, megtartotta a térfél közepén, majd tökéletesen szöktette Sallait, aki a tizenhatoson belül állítgatás nélkül jobb külsővel elrúgta a labdát Ramsdale lába mellett.
40
- A 77. percben Kane használható labdát kapott a szélről, kilenc méterről fejelt, a kapufáról pattant vissza a labda a mezőnybe, a center megpróbálta átvenni, de másodszorra már nem tudta, így odalett a helyzet. A 81. percben Magyarország berúgta a harmadik gólt.
41
- Nego fejesét még hárították a védők, a kipattanót készítette le Ádám Martin a támadást kísérő Nagy Zsolt elé, aki 17 méterről külsővel, állítgatás nélkül, pazarul lőtte ki a jobb alsó sarkot. A 29 éves felcsúti védő ennek a 11 napnak a legnagyobb felfedezettje, mert a németek ellen is eredményes volt szombaton.
42
- Stonest a hajrában még kiállították, a mieink nem törekedtek még jobban a gólkülönbség javítására, de magabiztosan passzolgattak, így Angliának esélye sem volt a szépítésre, miután alig volt náluk a labda. Hogy teljes legyen az angol KO, arról Gazdag Dániel gondoskodott, amikor egy nagy sprint után lazán átpörgette a labdát a kimozduló kapus felett.
43
- Ha Anglia nem is veszi komolyan ezt a sorozatot, mert a novemberi vb-re készül, és egy fárasztó szezon végén már a legszívesebben pihennének a klasszisai, négy gólt biztosan nem akart kapni, mert így könnyen ki is eshetnek az A divízióból. Tavaly szeptemberben Eb-selejtezőn Anglia ugyanilyen arányban verte a mieinket a Puskás Arénában, ez most egy méltó visszavágás volt.
44
- A csoport másik meccsén a németek 5-2-vel küldték haza az olaszokat, és ezzel feljöttek a második helyre.""",
45
- """A megszokott menetrenden kívül és váratlanul nagy mértékben emelt a jegybank az irányadó rátán. A forint a hírre 395 alá erősödött az euróval szemben.
46
- 7,25 százalékos kamattal hirdette meg a Magyar Nemzeti Bank (MNB) az egyhetes betéti tenderét – derül ki a jegybank által közzétett adatokból. Az egyhetes betét azt jelenti, hogy a bankok ezzel a rátával parkoltathatják a pénzüket egy hétig az MNB-ben. Mivel az egyhetes betéti eszköz kamata jelenleg magasabb, mint az alapkamat, valójában ez az irányadó ráta.
47
- A kamatemelés váratlan, mind időzítését, mind mértékét tekintve. A jegybank ugyan kommunikációja szerint nyitva tartja a lehetőségét, hogy bármikor emeljen az egyhetes betét kamatán, azon a Monetáris Tanács havi kamatdöntő ülései után szokott emelni. A cél az, hogy idővel az alapkamat és az egyhetes betét kamata újra összezárjon. A kamatemelés így a bevett menetrenden kívüli. A mértéke is nagyobb a megszokottnál, az emelés 0,5 százalékpontos, miközben a jegybank egy ideje 0,3 százalékpontos lépésekkel haladt felfelé. Legutóbb márciusban volt 0,5 százalékpontos emelés, ekkor a ráta 5,35 százalékról 5,85 százalékra nőtt.
48
- A Monetáris Tanács legutóbb május végén emelt az alapkamat szintjén, 5,4-ről 5,9 százalékra. Addig kell a kamatokat emelni, amíg az szükséges, hogy az inflációs célt fenntartható módon el tudjuk érni – mondta Virág Barnabás alelnök az alapkamat-emelés után. Az alelnök szerint a következő hónapokban várhatóan még tovább nő az infláció az áprilisi 9,5 százalékról, Virág elhúzódó infláció elleni harcot, a szigorúbb monetáris kondíciók tartós fenntartását ígérte.
49
- A forint árfolyama mindenesetre jól reagált az egyhetesbetétkamat-emelésre, az euróval szemben a napi nyitó árfolyam 397,5 környékéről 395 alá erősödött. A megelőző napokban a forint sorra döntögette a negatív árfolyamrekordokat, az euróval szemben többször 400 fölött is járt. Jelenleg a történelmi mélypont 402,96.
50
- """,
51
- """Jövőre nem lesz "ledolgozós" hétvége
52
- 2023-ban egyetlen szombati munkanap sem lesz.
53
- Jövőre kétszer (húsvétkor és karácsonykor) lesz négynapos a hétvége. Emellett négyszer lesz háromnapos hosszú hétvége, mivel május 1-én, pünkösdkor, október 23-án, és az újévkor is egy-egy hétfővel bővülnek majd a hétvégi szabadnapok - ezt Koncz Zsófia, a Technológiai és Ipari Minisztérium új parlamenti államtitkára közölte egy Facebook-bejegyzésben, hangsúlyozva, hogy a munkarendet meghatározó minisztériumuk úgy döntött, hogy 2023-ban egyetlen egynapos, "ledolgozós” hétvége sem lesz.
54
- Hogy mennyit ér egy munkanap, arról csak becsléseket lehet készíteni, és a statisztikusok legszívesebben erről is lebeszélnék a kísérletező kedvűeket. Nagyon leegyszerűsítve mondhatjuk azt: a GDP-t leosztva a munkanapok számával 160 milliárd forintot ér egy munkanap, de akkor még azt sem vettük figyelembe, hogy van munka azért hétvégéken is.
55
- """
56
- ]
 
5
  import spacy
6
  from spacy import Language
7
 
8
+ NLP: Language = spacy.load("hu_core_news_trf")
9
 
10
 
11
  def _compute_idf(freq_file: Path) -> Dict[str, float]:
 
26
 
27
 
28
  IDF: Dict[str, float] = _compute_idf(Path(__file__).parent.parent / "resources" / "freq.list")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/keyphrases.py CHANGED
@@ -4,7 +4,7 @@ import gradio as gr
4
  import pandas as pd
5
  from textacy.extract.keyterms.sgrank import sgrank as keywords
6
 
7
- from examples.common import NLP, IDF, NEWS_EXAMPLES
8
 
9
 
10
  def process(text: str) -> pd.DataFrame:
@@ -16,10 +16,38 @@ def process(text: str) -> pd.DataFrame:
16
  if all(other == term or term not in other for other in term_set)])
17
 
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  demo = gr.Interface(
20
  fn=process,
21
- inputs=gr.Textbox(value=NEWS_EXAMPLES[0], lines=10, label="Input text", show_label=True),
22
  outputs=gr.DataFrame(label="Keywords", show_label=False, max_cols=2, max_rows=10),
23
- examples=NEWS_EXAMPLES,
24
  # cache_examples=True,
25
  )
 
4
  import pandas as pd
5
  from textacy.extract.keyterms.sgrank import sgrank as keywords
6
 
7
+ from examples.common import NLP, IDF
8
 
9
 
10
  def process(text: str) -> pd.DataFrame:
 
16
  if all(other == term or term not in other for other in term_set)])
17
 
18
 
19
+ EXAMPLES = [
20
+ """A magyar futballválogatott negyedik Nemzetek Ligája mérkőzésén másodszor nyert, a hazai 1-0-s siker után idegenben 4-0-val megsemmisítette Angliát. Nagy győzelem volt, az enervált angolokat a saját közönségük fütyülte ki, miután a második félidőben el sem találták a kaput. 96 éve nem kaptak ekkora verést az angolok hazai pályán.
21
+ Az angol kapitány, Gareth Southgate kilenc helyen változtatott azon a csapaton, amelyik az olaszok ellen gól nélküli döntetlent játszott szombaton. Marco Rossi a kapuban cserélt, Dibusz Dénes állt a gólvonalon, Schäfer András visszatért a középpályára, miután a németek ellen letöltötte eltiltását. A 3-4-2-1-es felállás ezúttal sem változott. Ha végig akarja nézni helyszíni közvetítésünket, mit műveltek az angolok a Himnusz alatt, itt megteheti.
22
+ Az első helyzetre a hatodik percig kellett várni, akkor Kane passzából James húzott el a bal oldalon, középre adta a labdát, Bowen fejelt, Nagy Zsolt a helyén volt, és mentett.
23
+ Az első magyar lövés rögtön a kapuban landolt.
24
+ Szoboszlai ívelt be szabadrúgást a tizenhatoson belülre, Lang és Stone ugrott fel fejelni, a magyar védő megelőzte ellenfelét. A labda Kane talpa alatt elcsúszott, Sallai combbal átvette, azonnal lőtt 7 méterről, mielőtt még Phillips odaért volna, Ramsdale ugyan beleért, de kiütni már nem tudta. Szoboszlainak volt egy másik, sokkal közelebbről elvégzett szabadrúgása, amit igen veszélyesen lőtt be a kapu elé, James a gólvonalról mentett, Szalai Attila elől. A kipattanóból az angolok egy gyors kontrát vezettek, de a magyar tízes visszafutott, és a tizenhatoson belül szerelni tudott.
25
+ A magyar válogatott nem volt nagy nyomás alatt, az angolok akkor jártak legközelebb a gólhoz, amikor Orbán a 36. percben a saját kapuja felé fejelt, de Dibusz akkor is a helyén volt. A hazai szögletek veszélyesek voltak, de egyik sem annyira, hogy a szívünkhöz kellett volna kapnunk.
26
+ A második félidőben az angolok felgyorsították a játékukat, de igazán komoly helyzetet nem tudtak kialakítani, Rossi pedig már az 55. percben érezte, hogy frissíteni kell, és Szoboszlai helyére Gazdag, Styles helyére pedig Nagy Ádám állt.
27
+ Mivel a válogatott visszaállt, és fegyelmezetten zárta le a területeket, az angolok ötlettelen, olykor lassú adogatása veszélytelen volt. A csapat ezúttal is igazolta, mennyire képes megnehezíteni, megkeseríteni a riválisai dolgát.
28
+ A hajrában jött a varázslat Most azonban azt is igazolta, hogy egy pillanat alatt a kapu elé tud kerülni. A Szalai helyére beálló Ádám Martin megharcolt egy labdáért, megtartotta a térfél közepén, majd tökéletesen szöktette Sallait, aki a tizenhatoson belül állítgatás nélkül jobb külsővel elrúgta a labdát Ramsdale lába mellett.
29
+ A 77. percben Kane használható labdát kapott a szélről, kilenc méterről fejelt, a kapufáról pattant vissza a labda a mezőnybe, a center megpróbálta átvenni, de másodszorra már nem tudta, így odalett a helyzet. A 81. percben Magyarország berúgta a harmadik gólt.
30
+ Nego fejesét még hárították a védők, a kipattanót készítette le Ádám Martin a támadást kísérő Nagy Zsolt elé, aki 17 méterről külsővel, állítgatás nélkül, pazarul lőtte ki a jobb alsó sarkot. A 29 éves felcsúti védő ennek a 11 napnak a legnagyobb felfedezettje, mert a németek ellen is eredményes volt szombaton.
31
+ Stonest a hajrában még kiállították, a mieink nem törekedtek még jobban a gólkülönbség javítására, de magabiztosan passzolgattak, így Angliának esélye sem volt a szépítésre, miután alig volt náluk a labda. Hogy teljes legyen az angol KO, arról Gazdag Dániel gondoskodott, amikor egy nagy sprint után lazán átpörgette a labdát a kimozduló kapus felett.
32
+ Ha Anglia nem is veszi komolyan ezt a sorozatot, mert a novemberi vb-re készül, és egy fárasztó szezon végén már a legszívesebben pihennének a klasszisai, négy gólt biztosan nem akart kapni, mert így könnyen ki is eshetnek az A divízióból. Tavaly szeptemberben Eb-selejtezőn Anglia ugyanilyen arányban verte a mieinket a Puskás Arénában, ez most egy méltó visszavágás volt.
33
+ A csoport másik meccsén a németek 5-2-vel küldték haza az olaszokat, és ezzel feljöttek a második helyre.""",
34
+ """A megszokott menetrenden kívül és váratlanul nagy mértékben emelt a jegybank az irányadó rátán. A forint a hírre 395 alá erősödött az euróval szemben.
35
+ 7,25 százalékos kamattal hirdette meg a Magyar Nemzeti Bank (MNB) az egyhetes betéti tenderét – derül ki a jegybank által közzétett adatokból. Az egyhetes betét azt jelenti, hogy a bankok ezzel a rátával parkoltathatják a pénzüket egy hétig az MNB-ben. Mivel az egyhetes betéti eszköz kamata jelenleg magasabb, mint az alapkamat, valójában ez az irányadó ráta.
36
+ A kamatemelés váratlan, mind időzítését, mind mértékét tekintve. A jegybank ugyan kommunikációja szerint nyitva tartja a lehetőségét, hogy bármikor emeljen az egyhetes betét kamatán, azon a Monetáris Tanács havi kamatdöntő ülései után szokott emelni. A cél az, hogy idővel az alapkamat és az egyhetes betét kamata újra összezárjon. A kamatemelés így a bevett menetrenden kívüli. A mértéke is nagyobb a megszokottnál, az emelés 0,5 százalékpontos, miközben a jegybank egy ideje 0,3 százalékpontos lépésekkel haladt felfelé. Legutóbb márciusban volt 0,5 százalékpontos emelés, ekkor a ráta 5,35 százalékról 5,85 százalékra nőtt.
37
+ A Monetáris Tanács legutóbb május végén emelt az alapkamat szintjén, 5,4-ről 5,9 százalékra. Addig kell a kamatokat emelni, amíg az szükséges, hogy az inflációs célt fenntartható módon el tudjuk érni – mondta Virág Barnabás alelnök az alapkamat-emelés után. Az alelnök szerint a következő hónapokban várhatóan még tovább nő az infláció az áprilisi 9,5 százalékról, Virág elhúzódó infláció elleni harcot, a szigorúbb monetáris kondíciók tartós fenntartását ígérte.
38
+ A forint árfolyama mindenesetre jól reagált az egyhetesbetétkamat-emelésre, az euróval szemben a napi nyitó árfolyam 397,5 környékéről 395 alá erősödött. A megelőző napokban a forint sorra döntögette a negatív árfolyamrekordokat, az euróval szemben többször 400 fölött is járt. Jelenleg a történelmi mélypont 402,96.
39
+ """,
40
+ """Jövőre nem lesz "ledolgozós" hétvége
41
+ 2023-ban egyetlen szombati munkanap sem lesz.
42
+ Jövőre kétszer (húsvétkor és karácsonykor) lesz négynapos a hétvége. Emellett négyszer lesz háromnapos hosszú hétvége, mivel május 1-én, pünkösdkor, október 23-án, és az újévkor is egy-egy hétfővel bővülnek majd a hétvégi szabadnapok - ezt Koncz Zsófia, a Technológiai és Ipari Minisztérium új parlamenti államtitkára közölte egy Facebook-bejegyzésben, hangsúlyozva, hogy a munkarendet meghatározó minisztériumuk úgy döntött, hogy 2023-ban egyetlen egynapos, "ledolgozós” hétvége sem lesz.
43
+ Hogy mennyit ér egy munkanap, arról csak becsléseket lehet készíteni, és a statisztikusok legszívesebben erről is lebeszélnék a kísérletező kedvűeket. Nagyon leegyszerűsítve mondhatjuk azt: a GDP-t leosztva a munkanapok számával 160 milliárd forintot ér egy munkanap, de akkor még azt sem vettük figyelembe, hogy van munka azért hétvégéken is.
44
+ """
45
+ ]
46
+
47
  demo = gr.Interface(
48
  fn=process,
49
+ inputs=gr.Textbox(value=EXAMPLES[0], lines=10, label="Input text", show_label=True),
50
  outputs=gr.DataFrame(label="Keywords", show_label=False, max_cols=2, max_rows=10),
51
+ examples=EXAMPLES,
52
  # cache_examples=True,
53
  )
examples/relation.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+
4
+ from examples.common import NLP
5
+ from resources import triples
6
+
7
+
8
+ def process(text: str) -> pd.DataFrame:
9
+ doc = NLP(text)
10
+ tuples_to_list = list()
11
+
12
+ tuples = triples.subject_verb_object_triples(doc)
13
+ if tuples:
14
+ tuples_to_list = list(tuples)
15
+
16
+ subject = ""
17
+ verb = ""
18
+ object = ""
19
+
20
+ if len(tuples_to_list) == 0:
21
+ return pd.DataFrame([["-", "-", "-"]], columns=['Subject', 'Verb', 'Object'])
22
+
23
+ for sub_multiple in tuples_to_list[0][0]:
24
+ subject += str(sub_multiple) + ", "
25
+ subject = subject[:-2]
26
+ for verb_multiple in tuples_to_list[0][1]:
27
+ verb += str(verb_multiple) + ", "
28
+ verb = verb[:-2]
29
+ for obj_multiple in tuples_to_list[0][2]:
30
+ object += str(obj_multiple) + ", "
31
+ object = object[:-2]
32
+
33
+ relation_list = [[subject, verb, object]]
34
+
35
+ return pd.DataFrame(relation_list, columns=['Subject', 'Verb', 'Object'])
36
+
37
+
38
+ EXAMPLES = [
39
+ "Anna éppen most házat épít magának.",
40
+ "Noémi gulyáslevest szeret főzni, ha éhes.",
41
+ "Balázs jéghideg helyi ananászlevet ivott Hawaii fehér homokos partján.",
42
+ "Júliska fagyit árul a nyáron teljes állásban.",
43
+ "Einstein megmutatta a házát építés közben.",
44
+ "Hawking nyilatkozott egy levelet, miszerint a felfedezései az élete legizgalmasabb eseményei voltak."
45
+ ]
46
+
47
+ demo = gr.Interface(
48
+ fn=process,
49
+ inputs=gr.Textbox(value=EXAMPLES[0], lines=10, label="Input text", show_label=True),
50
+ outputs=gr.DataFrame(label="Keywords", show_label=False, max_cols=3, max_rows=1),
51
+ examples=EXAMPLES,
52
+ # cache_examples=True,
53
+ )
poetry.lock CHANGED
@@ -60,8 +60,8 @@ idna = ">=2.8"
60
  sniffio = ">=1.1"
61
 
62
  [package.extras]
63
- test = ["coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "contextlib2", "uvloop (<0.15)", "mock (>=4)", "uvloop (>=0.15)"]
64
  doc = ["packaging", "sphinx-rtd-theme", "sphinx-autodoc-typehints (>=1.2.0)"]
 
65
  trio = ["trio (>=0.16)"]
66
 
67
  [[package]]
@@ -92,10 +92,10 @@ optional = false
92
  python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
93
 
94
  [package.extras]
 
95
  docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"]
96
  tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "cloudpickle"]
97
  tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "cloudpickle"]
98
- dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"]
99
 
100
  [[package]]
101
  name = "backoff"
@@ -117,8 +117,8 @@ python-versions = ">=3.6"
117
  cffi = ">=1.1"
118
 
119
  [package.extras]
120
- typecheck = ["mypy"]
121
  tests = ["pytest (>=3.2.1,!=3.3.0)"]
 
122
 
123
  [[package]]
124
  name = "beautifulsoup4"
@@ -132,8 +132,8 @@ python-versions = ">=3.6.0"
132
  soupsieve = ">1.2"
133
 
134
  [package.extras]
135
- lxml = ["lxml"]
136
  html5lib = ["html5lib"]
 
137
 
138
  [[package]]
139
  name = "blis"
@@ -194,11 +194,11 @@ unicode_backport = ["unicodedata2"]
194
 
195
  [[package]]
196
  name = "click"
197
- version = "8.1.3"
198
  description = "Composable command line interface toolkit"
199
  category = "main"
200
  optional = false
201
- python-versions = ">=3.7"
202
 
203
  [package.dependencies]
204
  colorama = {version = "*", markers = "platform_system == \"Windows\""}
@@ -223,11 +223,11 @@ python-versions = ">=3.6"
223
  cffi = ">=1.12"
224
 
225
  [package.extras]
226
- pep8test = ["black", "flake8", "flake8-import-order", "pep8-naming"]
227
  docstest = ["pyenchant (>=1.6.11)", "twine (>=1.12.0)", "sphinxcontrib-spelling (>=4.0.1)"]
 
228
  sdist = ["setuptools_rust (>=0.11.4)"]
229
  ssh = ["bcrypt (>=3.1.5)"]
230
- docs = ["sphinx (>=1.6.5,!=1.8.0,!=3.1.0,!=3.1.1)", "sphinx-rtd-theme"]
231
  test = ["pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-subtests", "pytest-xdist", "pretend", "iso8601", "pytz", "hypothesis (>=1.11.4,!=3.79.2)"]
232
 
233
  [[package]]
@@ -292,10 +292,10 @@ pydantic = ">=1.6.2,<1.7 || >1.7,<1.7.1 || >1.7.1,<1.7.2 || >1.7.2,<1.7.3 || >1.
292
  starlette = "0.19.1"
293
 
294
  [package.extras]
295
- test = ["pytest (>=6.2.4,<7.0.0)", "pytest-cov (>=2.12.0,<4.0.0)", "mypy (==0.910)", "flake8 (>=3.8.3,<4.0.0)", "black (==22.3.0)", "isort (>=5.0.6,<6.0.0)", "requests (>=2.24.0,<3.0.0)", "httpx (>=0.14.0,<0.19.0)", "email_validator (>=1.1.1,<2.0.0)", "sqlalchemy (>=1.3.18,<1.5.0)", "peewee (>=3.13.3,<4.0.0)", "databases[sqlite] (>=0.3.2,<0.6.0)", "orjson (>=3.2.1,<4.0.0)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,<6.0.0)", "python-multipart (>=0.0.5,<0.0.6)", "flask (>=1.1.2,<3.0.0)", "anyio[trio] (>=3.2.1,<4.0.0)", "types-ujson (==4.2.1)", "types-orjson (==3.6.2)", "types-dataclasses (==0.6.5)"]
296
- doc = ["mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "mdx-include (>=1.4.1,<2.0.0)", "mkdocs-markdownextradata-plugin (>=0.1.7,<0.3.0)", "typer (>=0.4.1,<0.5.0)", "pyyaml (>=5.3.1,<7.0.0)"]
297
  all = ["requests (>=2.24.0,<3.0.0)", "jinja2 (>=2.11.2,<4.0.0)", "python-multipart (>=0.0.5,<0.0.6)", "itsdangerous (>=1.1.0,<3.0.0)", "pyyaml (>=5.3.1,<7.0.0)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,<6.0.0)", "orjson (>=3.2.1,<4.0.0)", "email_validator (>=1.1.1,<2.0.0)", "uvicorn[standard] (>=0.12.0,<0.18.0)"]
298
  dev = ["python-jose[cryptography] (>=3.3.0,<4.0.0)", "passlib[bcrypt] (>=1.7.2,<2.0.0)", "autoflake (>=1.4.0,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)", "uvicorn[standard] (>=0.12.0,<0.18.0)", "pre-commit (>=2.17.0,<3.0.0)"]
 
 
299
 
300
  [[package]]
301
  name = "feedfinder2"
@@ -350,18 +350,18 @@ optional = false
350
  python-versions = ">=3.7"
351
 
352
  [package.extras]
353
- plot = ["matplotlib"]
354
  all = ["fs (>=2.2.0,<3)", "lxml (>=4.0,<5)", "zopfli (>=0.1.4)", "lz4 (>=1.7.4.2)", "matplotlib", "sympy", "skia-pathops (>=0.5.0)", "uharfbuzz (>=0.23.0)", "brotlicffi (>=0.8.0)", "scipy", "brotli (>=1.0.1)", "munkres", "unicodedata2 (>=14.0.0)", "xattr"]
355
- lxml = ["lxml (>=4.0,<5)"]
356
  interpolatable = ["scipy", "munkres"]
357
- ufo = ["fs (>=2.2.0,<3)"]
358
  pathops = ["skia-pathops (>=0.5.0)"]
359
- woff = ["zopfli (>=0.1.4)", "brotlicffi (>=0.8.0)", "brotli (>=1.0.1)"]
360
- symfont = ["sympy"]
361
- graphite = ["lz4 (>=1.7.4.2)"]
362
- unicode = ["unicodedata2 (>=14.0.0)"]
363
  repacker = ["uharfbuzz (>=0.23.0)"]
 
364
  type1 = ["xattr"]
 
 
 
365
 
366
  [[package]]
367
  name = "frozenlist"
@@ -380,27 +380,27 @@ optional = false
380
  python-versions = ">=3.7"
381
 
382
  [package.extras]
383
- oci = ["ocifs"]
384
- hdfs = ["pyarrow (>=1)"]
 
 
 
385
  entrypoints = ["importlib-metadata"]
 
 
386
  git = ["pygit2"]
 
387
  gs = ["gcsfs"]
388
- s3 = ["s3fs"]
389
  gui = ["panel"]
390
- sftp = ["paramiko"]
391
- github = ["requests"]
392
- gcs = ["gcsfs"]
393
- fuse = ["fusepy"]
394
- tqdm = ["tqdm"]
395
- ssh = ["paramiko"]
396
- arrow = ["pyarrow (>=1)"]
397
- dropbox = ["dropboxdrivefs", "requests", "dropbox"]
398
- abfs = ["adlfs"]
399
  http = ["requests", "aiohttp"]
400
- adl = ["adlfs"]
401
  libarchive = ["libarchive-c"]
402
- dask = ["dask", "distributed"]
 
 
403
  smb = ["smbprotocol"]
 
 
404
 
405
  [[package]]
406
  name = "gradio"
@@ -439,19 +439,46 @@ optional = false
439
  python-versions = ">=3.6"
440
 
441
  [[package]]
442
- name = "hu-core-news-lg"
443
- version = "3.3.0"
444
- description = "Core Hungarian model for HuSpaCy. Components: tok2vec, senter, tagger, morphologizer, lemmatizer, parser, ner"
445
  category = "main"
446
  optional = false
447
  python-versions = "*"
448
 
449
  [package.dependencies]
450
- spacy = ">=3.3.0,<3.4.0"
 
 
451
 
452
  [package.source]
453
  type = "url"
454
- url = "https://huggingface.co/huspacy/hu_core_news_lg/resolve/v3.3.0/hu_core_news_lg-any-py3-none-any.whl"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
455
  [[package]]
456
  name = "idna"
457
  version = "3.3"
@@ -529,10 +556,10 @@ python-versions = ">=3.6"
529
  uc-micro-py = "*"
530
 
531
  [package.extras]
532
- test = ["coverage", "pytest", "pytest-cov"]
533
- doc = ["sphinx", "sphinx-book-theme", "myst-parser"]
534
  benchmark = ["pytest", "pytest-benchmark"]
535
  dev = ["pre-commit", "isort", "flake8", "black"]
 
 
536
 
537
  [[package]]
538
  name = "lxml"
@@ -543,10 +570,10 @@ optional = false
543
  python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*"
544
 
545
  [package.extras]
546
- source = ["Cython (>=0.29.7)"]
547
  cssselect = ["cssselect (>=0.7)"]
548
  html5 = ["html5lib"]
549
  htmlsoup = ["beautifulsoup4"]
 
550
 
551
  [[package]]
552
  name = "markdown-it-py"
@@ -562,14 +589,14 @@ mdit-py-plugins = {version = "*", optional = true, markers = "extra == \"plugins
562
  mdurl = ">=0.1,<1.0"
563
 
564
  [package.extras]
565
- rtd = ["attrs", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx-book-theme"]
 
566
  compare = ["commonmark (>=0.9.1,<0.10.0)", "markdown (>=3.3.6,<3.4.0)", "mistletoe (>=0.8.1,<0.9.0)", "mistune (>=2.0.2,<2.1.0)", "panflute (>=2.1.3,<2.2.0)"]
 
 
567
  profiling = ["gprof2dot"]
568
- code_style = ["pre-commit (==2.6)"]
569
  testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
570
- plugins = ["mdit-py-plugins"]
571
- linkify = ["linkify-it-py (>=1.0,<2.0)"]
572
- benchmarking = ["psutil", "pytest", "pytest-benchmark (>=3.2,<4.0)"]
573
 
574
  [[package]]
575
  name = "markupsafe"
@@ -610,8 +637,8 @@ python-versions = "~=3.6"
610
  markdown-it-py = ">=1.0.0,<3.0.0"
611
 
612
  [package.extras]
613
- rtd = ["myst-parser (>=0.14.0,<0.15.0)", "sphinx-book-theme (>=0.1.0,<0.2.0)"]
614
  code_style = ["pre-commit (==2.6)"]
 
615
  testing = ["coverage", "pytest (>=3.6,<4)", "pytest-cov", "pytest-regressions"]
616
 
617
  [[package]]
@@ -656,10 +683,10 @@ python-versions = ">=3.8"
656
 
657
  [package.extras]
658
  default = ["numpy (>=1.19)", "scipy (>=1.8)", "matplotlib (>=3.4)", "pandas (>=1.3)"]
659
- doc = ["sphinx (>=5)", "pydata-sphinx-theme (>=0.9)", "sphinx-gallery (>=0.10)", "numpydoc (>=1.4)", "pillow (>=9.1)", "nb2plots (>=0.6)", "texext (>=0.6.6)"]
660
- test = ["pytest (>=7.1)", "pytest-cov (>=3.0)", "codecov (>=2.1)"]
661
  developer = ["pre-commit (>=2.19)", "mypy (>=0.960)"]
 
662
  extra = ["lxml (>=4.6)", "pygraphviz (>=1.9)", "pydot (>=1.4.2)", "sympy (>=1.10)"]
 
663
 
664
  [[package]]
665
  name = "newspaper3k"
@@ -699,12 +726,12 @@ regex = ">=2021.8.3"
699
  tqdm = "*"
700
 
701
  [package.extras]
702
- plot = ["matplotlib"]
703
  all = ["numpy", "pyparsing", "scipy", "matplotlib", "twython", "requests", "scikit-learn", "python-crfsuite"]
704
- twitter = ["twython"]
705
- tgrep = ["pyparsing"]
706
- machine_learning = ["numpy", "python-crfsuite", "scikit-learn", "scipy"]
707
  corenlp = ["requests"]
 
 
 
 
708
 
709
  [[package]]
710
  name = "numpy"
@@ -768,8 +795,8 @@ pynacl = ">=1.0.1"
768
  six = "*"
769
 
770
  [package.extras]
771
- ed25519 = ["pynacl (>=1.0.1)", "bcrypt (>=3.1.3)"]
772
  all = ["pyasn1 (>=0.1.7)", "pynacl (>=1.0.1)", "bcrypt (>=3.1.3)", "invoke (>=1.3)", "gssapi (>=1.4.1)", "pywin32 (>=2.1.8)"]
 
773
  gssapi = ["pyasn1 (>=0.1.7)", "gssapi (>=1.4.1)", "pywin32 (>=2.1.8)"]
774
  invoke = ["invoke (>=1.3)"]
775
 
@@ -786,10 +813,10 @@ smart-open = ">=5.0.0,<6.0.0"
786
  typer = ">=0.3.0,<1.0.0"
787
 
788
  [package.extras]
789
- s3 = ["boto3"]
790
- test = ["pytest", "pytest-coverage", "mock", "typer-cli"]
791
  all = ["google-cloud-storage (>=1.26.0,<2.0.0)", "boto3", "pytest", "pytest-coverage", "mock", "typer-cli"]
792
  gcs = ["google-cloud-storage (>=1.26.0,<2.0.0)"]
 
 
793
 
794
  [[package]]
795
  name = "phonenumbers"
@@ -923,8 +950,8 @@ optional = false
923
  python-versions = ">=3.7"
924
 
925
  [package.extras]
926
- test = ["pytest", "pytest-cov", "pytest-flake8", "pytest-isort", "coverage"]
927
  doc = ["sphinx", "sphinx-rtd-theme"]
 
928
 
929
  [[package]]
930
  name = "python-dateutil"
@@ -987,8 +1014,8 @@ idna = ">=2.5,<4"
987
  urllib3 = ">=1.21.1,<1.27"
988
 
989
  [package.extras]
990
- use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
991
  socks = ["PySocks (>=1.5.6,!=1.5.7)"]
 
992
 
993
  [[package]]
994
  name = "requests-file"
@@ -1017,10 +1044,10 @@ scipy = ">=1.3.2"
1017
  threadpoolctl = ">=2.0.0"
1018
 
1019
  [package.extras]
1020
- docs = ["matplotlib (>=3.1.2)", "scikit-image (>=0.14.5)", "pandas (>=1.0.5)", "seaborn (>=0.9.0)", "memory-profiler (>=0.57.0)", "sphinx (>=4.0.1)", "sphinx-gallery (>=0.7.0)", "numpydoc (>=1.2.0)", "Pillow (>=7.1.2)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"]
1021
- tests = ["matplotlib (>=3.1.2)", "scikit-image (>=0.14.5)", "pandas (>=1.0.5)", "pytest (>=5.0.1)", "pytest-cov (>=2.9.0)", "flake8 (>=3.8.2)", "black (>=22.3.0)", "mypy (>=0.770)", "pyamg (>=4.0.0)", "numpydoc (>=1.2.0)"]
1022
  benchmark = ["matplotlib (>=3.1.2)", "pandas (>=1.0.5)", "memory-profiler (>=0.57.0)"]
 
1023
  examples = ["matplotlib (>=3.1.2)", "scikit-image (>=0.14.5)", "pandas (>=1.0.5)", "seaborn (>=0.9.0)"]
 
1024
 
1025
  [[package]]
1026
  name = "scipy"
@@ -1075,12 +1102,12 @@ python-versions = ">=3.6,<4.0"
1075
 
1076
  [package.extras]
1077
  all = ["boto3", "google-cloud-storage", "azure-storage-blob", "azure-common", "azure-core", "requests"]
1078
- http = ["requests"]
1079
- s3 = ["boto3"]
1080
- webhdfs = ["requests"]
1081
  azure = ["azure-storage-blob", "azure-common", "azure-core"]
1082
  gcs = ["google-cloud-storage"]
 
 
1083
  test = ["boto3", "google-cloud-storage", "azure-storage-blob", "azure-common", "azure-core", "requests", "moto[server] (==1.3.14)", "pathlib2", "responses", "paramiko", "parameterizedtestcase", "pytest", "pytest-rerunfailures"]
 
1084
 
1085
  [[package]]
1086
  name = "sniffio"
@@ -1100,7 +1127,7 @@ python-versions = ">=3.6"
1100
 
1101
  [[package]]
1102
  name = "spacy"
1103
- version = "3.3.1"
1104
  description = "Industrial-strength Natural Language Processing (NLP) in Python"
1105
  category = "main"
1106
  optional = false
@@ -1109,6 +1136,7 @@ python-versions = ">=3.6"
1109
  [package.dependencies]
1110
  blis = ">=0.4.0,<0.8.0"
1111
  catalogue = ">=2.0.6,<2.1.0"
 
1112
  cymem = ">=2.0.2,<2.1.0"
1113
  jinja2 = "*"
1114
  langcodes = ">=3.2.0,<4.0.0"
@@ -1119,36 +1147,58 @@ pathy = ">=0.3.5"
1119
  preshed = ">=3.0.2,<3.1.0"
1120
  pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<1.9.0"
1121
  requests = ">=2.13.0,<3.0.0"
1122
- spacy-legacy = ">=3.0.9,<3.1.0"
1123
  spacy-loggers = ">=1.0.0,<2.0.0"
1124
- srsly = ">=2.4.3,<3.0.0"
1125
- thinc = ">=8.0.14,<8.1.0"
1126
  tqdm = ">=4.38.0,<5.0.0"
1127
  typer = ">=0.3.0,<0.5.0"
1128
- wasabi = ">=0.9.1,<1.1.0"
1129
 
1130
  [package.extras]
1131
- lookups = ["spacy-lookups-data (>=1.0.3,<1.1.0)"]
1132
- transformers = ["spacy-transformers (>=1.1.2,<1.2.0)"]
1133
  apple = ["thinc-apple-ops (>=0.0.4,<1.0.0)"]
1134
- cuda90 = ["cupy-cuda90 (>=5.0.0b4,<11.0.0)"]
1135
- cuda91 = ["cupy-cuda91 (>=5.0.0b4,<11.0.0)"]
1136
- cuda92 = ["cupy-cuda92 (>=5.0.0b4,<11.0.0)"]
1137
- cuda115 = ["cupy-cuda115 (>=5.0.0b4,<11.0.0)"]
1138
- ko = ["natto-py (==0.9.0)"]
1139
  cuda102 = ["cupy-cuda102 (>=5.0.0b4,<11.0.0)"]
1140
- ja = ["sudachipy (>=0.5.2,!=0.6.1)", "sudachidict-core (>=20211220)"]
1141
- cuda112 = ["cupy-cuda112 (>=5.0.0b4,<11.0.0)"]
1142
- cuda113 = ["cupy-cuda113 (>=5.0.0b4,<11.0.0)"]
1143
  cuda110 = ["cupy-cuda110 (>=5.0.0b4,<11.0.0)"]
1144
  cuda111 = ["cupy-cuda111 (>=5.0.0b4,<11.0.0)"]
1145
- cuda101 = ["cupy-cuda101 (>=5.0.0b4,<11.0.0)"]
1146
- cuda100 = ["cupy-cuda100 (>=5.0.0b4,<11.0.0)"]
1147
- th = ["pythainlp (>=2.0)"]
1148
- cuda80 = ["cupy-cuda80 (>=5.0.0b4,<11.0.0)"]
1149
- cuda = ["cupy (>=5.0.0b4,<11.0.0)"]
1150
  cuda114 = ["cupy-cuda114 (>=5.0.0b4,<11.0.0)"]
 
 
 
 
 
 
 
 
1151
  ray = ["spacy-ray (>=0.1.0,<1.0.0)"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1152
 
1153
  [[package]]
1154
  name = "spacy-legacy"
@@ -1169,6 +1219,34 @@ python-versions = ">=3.6"
1169
  [package.dependencies]
1170
  wasabi = ">=0.8.1,<1.1.0"
1171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1172
  [[package]]
1173
  name = "srsly"
1174
  version = "2.4.3"
@@ -1219,11 +1297,11 @@ spacy = ">=3.0.0"
1219
  tqdm = ">=4.19.6"
1220
 
1221
  [package.extras]
1222
- viz = ["matplotlib (>=3.0.0)"]
1223
- docs = ["recommonmark (>=0.6.0,<0.7.0)", "sphinx (>=3.0.0,<4.0.0)"]
1224
  build_and_test = ["build", "pytest (>=6.0,<7.0)", "pytest-cov", "twine (>=3.0.0)", "wheel"]
1225
  dev = ["black", "build", "flake8 (>=3.8.0)", "mypy (>=0.900)", "recommonmark (>=0.6.0,<0.7.0)", "sphinx (>=3.0.0,<4.0.0)", "pytest (>=6.0,<7.0)", "pytest-cov", "twine (>=3.0.0)", "wheel"]
 
1226
  lint_and_format = ["black", "flake8 (>=3.8.0)", "mypy (>=0.900)"]
 
1227
 
1228
  [[package]]
1229
  name = "thinc"
@@ -1245,24 +1323,24 @@ srsly = ">=2.4.0,<3.0.0"
1245
  wasabi = ">=0.8.1,<1.1.0"
1246
 
1247
  [package.extras]
1248
- datasets = ["ml-datasets (>=0.2.0,<0.3.0)"]
1249
- mxnet = ["mxnet (>=1.5.1,<1.6.0)"]
1250
- cuda90 = ["cupy-cuda90 (>=5.0.0b4)"]
1251
- cuda91 = ["cupy-cuda91 (>=5.0.0b4)"]
1252
- cuda92 = ["cupy-cuda92 (>=5.0.0b4)"]
1253
- torch = ["torch (>=1.6.0)"]
1254
  cuda112 = ["cupy-cuda112 (>=5.0.0b4)"]
1255
  cuda113 = ["cupy-cuda113 (>=5.0.0b4)"]
1256
- cuda110 = ["cupy-cuda110 (>=5.0.0b4)"]
1257
- cuda115 = ["cupy-cuda115 (>=5.0.0b4)"]
1258
- cuda101 = ["cupy-cuda101 (>=5.0.0b4)"]
1259
- cuda = ["cupy (>=5.0.0b4)"]
1260
  cuda114 = ["cupy-cuda114 (>=5.0.0b4)"]
 
1261
  cuda80 = ["cupy-cuda80 (>=5.0.0b4)"]
 
 
 
 
 
1262
  tensorflow = ["tensorflow (>=2.0.0,<2.6.0)"]
1263
- cuda111 = ["cupy-cuda111 (>=5.0.0b4)"]
1264
- cuda102 = ["cupy-cuda102 (>=5.0.0b4)"]
1265
- cuda100 = ["cupy-cuda100 (>=5.0.0b4)"]
1266
 
1267
  [[package]]
1268
  name = "threadpoolctl"
@@ -1294,6 +1372,18 @@ idna = "*"
1294
  requests = ">=2.1.0"
1295
  requests-file = ">=1.4"
1296
 
 
 
 
 
 
 
 
 
 
 
 
 
1297
  [[package]]
1298
  name = "tomli"
1299
  version = "2.0.1"
@@ -1310,6 +1400,17 @@ category = "main"
1310
  optional = false
1311
  python-versions = ">=3.5"
1312
 
 
 
 
 
 
 
 
 
 
 
 
1313
  [[package]]
1314
  name = "tqdm"
1315
  version = "4.64.0"
@@ -1322,10 +1423,70 @@ python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7"
1322
  colorama = {version = "*", markers = "platform_system == \"Windows\""}
1323
 
1324
  [package.extras]
1325
- telegram = ["requests"]
1326
  notebook = ["ipywidgets (>=6)"]
1327
  slack = ["slack-sdk"]
1328
- dev = ["py-make (>=0.1.0)", "twine", "wheel"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1329
 
1330
  [[package]]
1331
  name = "typer"
@@ -1339,10 +1500,10 @@ python-versions = ">=3.6"
1339
  click = ">=7.1.1,<9.0.0"
1340
 
1341
  [package.extras]
1342
- test = ["shellingham (>=1.3.0,<2.0.0)", "pytest (>=4.4.0,<5.4.0)", "pytest-cov (>=2.10.0,<3.0.0)", "coverage (>=5.2,<6.0)", "pytest-xdist (>=1.32.0,<2.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "mypy (==0.910)", "black (>=22.3.0,<23.0.0)", "isort (>=5.0.6,<6.0.0)"]
1343
- doc = ["mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "mdx-include (>=1.4.1,<2.0.0)"]
1344
  all = ["colorama (>=0.4.3,<0.5.0)", "shellingham (>=1.3.0,<2.0.0)"]
1345
  dev = ["autoflake (>=1.3.1,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)"]
 
 
1346
 
1347
  [[package]]
1348
  name = "typing-extensions"
@@ -1415,7 +1576,7 @@ multidict = ">=4.0"
1415
  [metadata]
1416
  lock-version = "1.1"
1417
  python-versions = "~3.8"
1418
- content-hash = "f2bf888b325ef9d3f418a6cdcbd951892aeebf16eff5cbad5fc73088a0b3b36f"
1419
 
1420
  [metadata.files]
1421
  aiohttp = [
@@ -1627,8 +1788,8 @@ charset-normalizer = [
1627
  {file = "charset_normalizer-2.0.12-py3-none-any.whl", hash = "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"},
1628
  ]
1629
  click = [
1630
- {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"},
1631
- {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"},
1632
  ]
1633
  colorama = [
1634
  {file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"},
@@ -1668,7 +1829,6 @@ cycler = [
1668
  ]
1669
  cymem = [
1670
  {file = "cymem-2.0.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:700540b68e96a7056d0691d467df2bbaaf0934a3e6fe2383669998cbee19580a"},
1671
- {file = "cymem-2.0.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a261f51796a2705f3900ed22b8442519a0f230f50a816fb5bd89cb9b027dc5ac"},
1672
  {file = "cymem-2.0.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:971cf0a8437dfb4185c3049c086e463612fe849efadc0f5cc153fc81c501da7d"},
1673
  {file = "cymem-2.0.6-cp310-cp310-win_amd64.whl", hash = "sha256:6b0d1a6b0a1296f31fa9e4b7ae5ea49394084ecc883b1ae6fec4844403c43468"},
1674
  {file = "cymem-2.0.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b8e1c18bb00800425576710468299153caad20c64ddb6819d40a6a34e21ee21c"},
@@ -1678,11 +1838,9 @@ cymem = [
1678
  {file = "cymem-2.0.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd52d8a81881804625df88453611175ab7e0099b34f52204da1f6940cf2e83c9"},
1679
  {file = "cymem-2.0.6-cp37-cp37m-win_amd64.whl", hash = "sha256:4749f220e4c06ec44eb10de13794ff0508cdc4f8eff656cf49cab2cdb3122c0c"},
1680
  {file = "cymem-2.0.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2aa3fa467d906cd2c27fa0a2e2952dd7925f5fcc7973fab6d815ef6acb25aad8"},
1681
- {file = "cymem-2.0.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:228bd261a85d92d870ed358f263ee028ac026302304f2186827377a3895c5819"},
1682
  {file = "cymem-2.0.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea535f74ab6024e7416f93de564e5c81fb7c0964b96280de66f60aeb05f0cf53"},
1683
  {file = "cymem-2.0.6-cp38-cp38-win_amd64.whl", hash = "sha256:4f87fe087f2ae36c3e20e2b1a29d7f76a28c035372d0a97655f26223d975235a"},
1684
  {file = "cymem-2.0.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a93fba62fe79dbf6fc4d5b6d804a6e114b44af3ff3d40a28833ee39f21bd336b"},
1685
- {file = "cymem-2.0.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5d631239bfb07293ee444b269656308da952b6b003b12332ccb1c624dbfcda4b"},
1686
  {file = "cymem-2.0.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:04676d696596b0db3f3c5a3936bab12fb6f24278921a6622bb185e61765b2b4d"},
1687
  {file = "cymem-2.0.6-cp39-cp39-win_amd64.whl", hash = "sha256:c59293b232b53ebb47427f16cf648e937022f489cff36c11d1d8a1f0075b6609"},
1688
  {file = "cymem-2.0.6.tar.gz", hash = "sha256:169725b5816959d34de2545b33fee6a8021a6e08818794a426c5a4f981f17e5e"},
@@ -1789,7 +1947,11 @@ h11 = [
1789
  {file = "h11-0.13.0-py3-none-any.whl", hash = "sha256:8ddd78563b633ca55346c8cd41ec0af27d3c79931828beffb46ce70a379e7442"},
1790
  {file = "h11-0.13.0.tar.gz", hash = "sha256:70813c1135087a248a4d38cc0e1a0181ffab2188141a93eaf567940c3957ff06"},
1791
  ]
1792
- hu-core-news-lg = []
 
 
 
 
1793
  idna = [
1794
  {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"},
1795
  {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"},
@@ -2267,7 +2429,6 @@ pillow = [
2267
  ]
2268
  preshed = [
2269
  {file = "preshed-3.0.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:66a71ced487516cf81fd0431a3a843514262ae2f33e9a7688b87562258fa75d5"},
2270
- {file = "preshed-3.0.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9fb3d1da40abe3d99a9ee28c0df7090c1bab7c09042421d3cade7dc12e868c70"},
2271
  {file = "preshed-3.0.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c98f725d8478f3ade4ab1ea00f50a92d2d9406d37276bc46fd8bab1d47452c4"},
2272
  {file = "preshed-3.0.6-cp310-cp310-win_amd64.whl", hash = "sha256:ea8aa9610837e907e8442e79300df0a861bfdb4dcaf026a5d9642a688ad04815"},
2273
  {file = "preshed-3.0.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e03ae3eee961106a517fcd827b5a7c51f7317236b3e665c989054ab8dc381d28"},
@@ -2277,11 +2438,9 @@ preshed = [
2277
  {file = "preshed-3.0.6-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:61b2ea656cb1c38d544cc774f1c2ad1cdab23167b46b35310a7e211d4ba9c6d0"},
2278
  {file = "preshed-3.0.6-cp37-cp37m-win_amd64.whl", hash = "sha256:87e1add41b7f6236a3ccc34788f47ab8682bc28e8a2d369089062e274494c1a0"},
2279
  {file = "preshed-3.0.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a279c138ad1d5be02547b1545254929588414b01571fe637016367f6a1aa11de"},
2280
- {file = "preshed-3.0.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ab8b5232255ebf7ee96e3cb4f1bedaace6ae0925d1113d4ede9d44c78f088ef2"},
2281
  {file = "preshed-3.0.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3af09f4cfcdaca085fd87dac8107617c4e2bb0ad1458f953841b71e9728287f5"},
2282
  {file = "preshed-3.0.6-cp38-cp38-win_amd64.whl", hash = "sha256:f92e752a868ea2690e1b38c4b775251a145e0fce36b9bdd972539e8271b7a23a"},
2283
  {file = "preshed-3.0.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eaffbc71fdb8625f9aac4fe7e19e20bf318d1421ea05903bebe3e6ffef27b587"},
2284
- {file = "preshed-3.0.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9c7809491e26a41bd6e4e2e93ddf3e8989cff256c3829a7953b57c97a8268a6c"},
2285
  {file = "preshed-3.0.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cfe1495fcfc7f479de840ddc4f426dbb55351e218ae5c8712c1269183a4d0060"},
2286
  {file = "preshed-3.0.6-cp39-cp39-win_amd64.whl", hash = "sha256:92a8f49d17a63537a8beed48a049b62ef168ca07e0042a5b2bcdf178a1fb5d48"},
2287
  {file = "preshed-3.0.6.tar.gz", hash = "sha256:fb3b7588a3a0f2f2f1bf3fe403361b2b031212b73a37025aea1df7215af3772a"},
@@ -2575,25 +2734,64 @@ soupsieve = [
2575
  {file = "soupsieve-2.3.2.post1.tar.gz", hash = "sha256:fc53893b3da2c33de295667a0e19f078c14bf86544af307354de5fcf12a3f30d"},
2576
  ]
2577
  spacy = [
2578
- {file = "spacy-3.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d2e9510ad67fef328bbec01673d6e65f5670ccbb9434dfe0deaa7edffcc900a6"},
2579
- {file = "spacy-3.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:192baa97dfb90b9c59c8c5e90448284ea3810d4a9422f514f2d3eae78f97aa14"},
2580
- {file = "spacy-3.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5916b1e43b848680e01619963c2dd6ec0c1d99b5a5665c9b9b761f65c5adf18e"},
2581
- {file = "spacy-3.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:ad3c6726b92723d16c15dc7ecc48abbb6495ef7b4c30d7cf4948c1195162a272"},
2582
- {file = "spacy-3.3.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:ee876b2036e8500b6dd315347eb2fc0345be097806bfe6857c7347beb759169a"},
2583
- {file = "spacy-3.3.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b18ed499fc8d848bb9026b294dc78128028c58a73ba0ae547d42fc6d471628a8"},
2584
- {file = "spacy-3.3.1-cp36-cp36m-win_amd64.whl", hash = "sha256:7b58cd4e27d6ae29e23b47e9ba6b8ca4e02fc69d65b57b2a9c7072c3a187b4ba"},
2585
- {file = "spacy-3.3.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ae06825f2e349e035fae602e2a48ba1453b8eed9238144e36e1b09a741744c6e"},
2586
- {file = "spacy-3.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fd1e3abab9df2e113d29d1bcf16700b68b125504a2b477b49eb7b6c7b5225ea"},
2587
- {file = "spacy-3.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:eea5d066f472261a37e43aa6b5002af5a9232da66a83ea5b7b590b905f00a5bb"},
2588
- {file = "spacy-3.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:dfbd15084f7b49fed1160999ef2986f2769cbcc50ab240bf026fa82337500ace"},
2589
- {file = "spacy-3.3.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:618f3cd983aa37bb448bdd554e1abf1d82106fc3163959786a2ca1fe01dc4f1b"},
2590
- {file = "spacy-3.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1581ba49a6f687a5e0ad8291129793af8e3c4bb02ffbc8857b34acfa7ed3049f"},
2591
- {file = "spacy-3.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:b652cc9fcd1e07f733a5de9c89fef300852d0129240f0eb11513d03bba470609"},
2592
- {file = "spacy-3.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9af33d170d8a697f50556b612be5468fdd4134759846c54415f8fc9f87ca1c25"},
2593
- {file = "spacy-3.3.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6971e37234395c6157a514ef1c1f15d27f91042df4e7fc8aefcc6e0d79a3cb8b"},
2594
- {file = "spacy-3.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:747a99fd2712e60325ff00b6b3131e4aef4c67b2f4433b9ffbd44833889a313d"},
2595
- {file = "spacy-3.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:787bd8546ae4feed088fb712f3baf5239197cedfe9e1ce9d8906b3e3ef2f10bc"},
2596
- {file = "spacy-3.3.1.tar.gz", hash = "sha256:7f87dbdb104d851ae6ba5fd3a76a2e14e22e048135903e98baf08571a3aa81c0"},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2597
  ]
2598
  spacy-legacy = [
2599
  {file = "spacy-legacy-3.0.9.tar.gz", hash = "sha256:4f7dcbc4e6c8e8cb4eadbb009f9c0a1a2a67442e0032c8d6776c9470c3759903"},
@@ -2603,6 +2801,10 @@ spacy-loggers = [
2603
  {file = "spacy-loggers-1.0.2.tar.gz", hash = "sha256:e75d44f4cf99e6763d7132ca7c8c420e0a92790222a08bc8eb9e24ea2c13536e"},
2604
  {file = "spacy_loggers-1.0.2-py3-none-any.whl", hash = "sha256:d48c9313a577ad1818da961cf6db71a73fd1e556ae47e6e68d7e28b541d11e18"},
2605
  ]
 
 
 
 
2606
  srsly = [
2607
  {file = "srsly-2.4.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2d0236feafe3805b384532221596e6749a54d0ff10ba022b333dc1de7aa1b2f7"},
2608
  {file = "srsly-2.4.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f96af9fde9f58d5923091fa723fa0fed58a83781b98e143a5d1fac5e738b9f0d"},
@@ -2664,6 +2866,41 @@ tldextract = [
2664
  {file = "tldextract-3.3.0-py3-none-any.whl", hash = "sha256:5d88321b1b528ebb8f678c72ab023f37caf6381f6af9576b4e60fd266cff178c"},
2665
  {file = "tldextract-3.3.0.tar.gz", hash = "sha256:adcd24abf21ce3450417cd5a00f23b7e57554ce8ae827334dd12bfcbb6274cf1"},
2666
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2667
  tomli = [
2668
  {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
2669
  {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
@@ -2672,10 +2909,35 @@ toolz = [
2672
  {file = "toolz-0.11.2-py3-none-any.whl", hash = "sha256:a5700ce83414c64514d82d60bcda8aabfde092d1c1a8663f9200c07fdcc6da8f"},
2673
  {file = "toolz-0.11.2.tar.gz", hash = "sha256:6b312d5e15138552f1bda8a4e66c30e236c831b612b2bf0005f8a1df10a4bc33"},
2674
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2675
  tqdm = [
2676
  {file = "tqdm-4.64.0-py2.py3-none-any.whl", hash = "sha256:74a2cdefe14d11442cedf3ba4e21a3b84ff9a2dbdc6cfae2c34addb2a14a5ea6"},
2677
  {file = "tqdm-4.64.0.tar.gz", hash = "sha256:40be55d30e200777a307a7585aee69e4eabb46b4ec6a4b4a5f2d9f11e7d5408d"},
2678
  ]
 
 
 
 
2679
  typer = [
2680
  {file = "typer-0.4.1-py3-none-any.whl", hash = "sha256:e8467f0ebac0c81366c2168d6ad9f888efdfb6d4e1d3d5b4a004f46fa444b5c3"},
2681
  {file = "typer-0.4.1.tar.gz", hash = "sha256:5646aef0d936b2c761a10393f0384ee6b5c7fe0bb3e5cd710b17134ca1d99cff"},
 
60
  sniffio = ">=1.1"
61
 
62
  [package.extras]
 
63
  doc = ["packaging", "sphinx-rtd-theme", "sphinx-autodoc-typehints (>=1.2.0)"]
64
+ test = ["coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "contextlib2", "uvloop (<0.15)", "mock (>=4)", "uvloop (>=0.15)"]
65
  trio = ["trio (>=0.16)"]
66
 
67
  [[package]]
 
92
  python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
93
 
94
  [package.extras]
95
+ dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"]
96
  docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"]
97
  tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "cloudpickle"]
98
  tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "cloudpickle"]
 
99
 
100
  [[package]]
101
  name = "backoff"
 
117
  cffi = ">=1.1"
118
 
119
  [package.extras]
 
120
  tests = ["pytest (>=3.2.1,!=3.3.0)"]
121
+ typecheck = ["mypy"]
122
 
123
  [[package]]
124
  name = "beautifulsoup4"
 
132
  soupsieve = ">1.2"
133
 
134
  [package.extras]
 
135
  html5lib = ["html5lib"]
136
+ lxml = ["lxml"]
137
 
138
  [[package]]
139
  name = "blis"
 
194
 
195
  [[package]]
196
  name = "click"
197
+ version = "8.0.4"
198
  description = "Composable command line interface toolkit"
199
  category = "main"
200
  optional = false
201
+ python-versions = ">=3.6"
202
 
203
  [package.dependencies]
204
  colorama = {version = "*", markers = "platform_system == \"Windows\""}
 
223
  cffi = ">=1.12"
224
 
225
  [package.extras]
226
+ docs = ["sphinx (>=1.6.5,!=1.8.0,!=3.1.0,!=3.1.1)", "sphinx-rtd-theme"]
227
  docstest = ["pyenchant (>=1.6.11)", "twine (>=1.12.0)", "sphinxcontrib-spelling (>=4.0.1)"]
228
+ pep8test = ["black", "flake8", "flake8-import-order", "pep8-naming"]
229
  sdist = ["setuptools_rust (>=0.11.4)"]
230
  ssh = ["bcrypt (>=3.1.5)"]
 
231
  test = ["pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-subtests", "pytest-xdist", "pretend", "iso8601", "pytz", "hypothesis (>=1.11.4,!=3.79.2)"]
232
 
233
  [[package]]
 
292
  starlette = "0.19.1"
293
 
294
  [package.extras]
 
 
295
  all = ["requests (>=2.24.0,<3.0.0)", "jinja2 (>=2.11.2,<4.0.0)", "python-multipart (>=0.0.5,<0.0.6)", "itsdangerous (>=1.1.0,<3.0.0)", "pyyaml (>=5.3.1,<7.0.0)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,<6.0.0)", "orjson (>=3.2.1,<4.0.0)", "email_validator (>=1.1.1,<2.0.0)", "uvicorn[standard] (>=0.12.0,<0.18.0)"]
296
  dev = ["python-jose[cryptography] (>=3.3.0,<4.0.0)", "passlib[bcrypt] (>=1.7.2,<2.0.0)", "autoflake (>=1.4.0,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)", "uvicorn[standard] (>=0.12.0,<0.18.0)", "pre-commit (>=2.17.0,<3.0.0)"]
297
+ doc = ["mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "mdx-include (>=1.4.1,<2.0.0)", "mkdocs-markdownextradata-plugin (>=0.1.7,<0.3.0)", "typer (>=0.4.1,<0.5.0)", "pyyaml (>=5.3.1,<7.0.0)"]
298
+ test = ["pytest (>=6.2.4,<7.0.0)", "pytest-cov (>=2.12.0,<4.0.0)", "mypy (==0.910)", "flake8 (>=3.8.3,<4.0.0)", "black (==22.3.0)", "isort (>=5.0.6,<6.0.0)", "requests (>=2.24.0,<3.0.0)", "httpx (>=0.14.0,<0.19.0)", "email_validator (>=1.1.1,<2.0.0)", "sqlalchemy (>=1.3.18,<1.5.0)", "peewee (>=3.13.3,<4.0.0)", "databases[sqlite] (>=0.3.2,<0.6.0)", "orjson (>=3.2.1,<4.0.0)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,<6.0.0)", "python-multipart (>=0.0.5,<0.0.6)", "flask (>=1.1.2,<3.0.0)", "anyio[trio] (>=3.2.1,<4.0.0)", "types-ujson (==4.2.1)", "types-orjson (==3.6.2)", "types-dataclasses (==0.6.5)"]
299
 
300
  [[package]]
301
  name = "feedfinder2"
 
350
  python-versions = ">=3.7"
351
 
352
  [package.extras]
 
353
  all = ["fs (>=2.2.0,<3)", "lxml (>=4.0,<5)", "zopfli (>=0.1.4)", "lz4 (>=1.7.4.2)", "matplotlib", "sympy", "skia-pathops (>=0.5.0)", "uharfbuzz (>=0.23.0)", "brotlicffi (>=0.8.0)", "scipy", "brotli (>=1.0.1)", "munkres", "unicodedata2 (>=14.0.0)", "xattr"]
354
+ graphite = ["lz4 (>=1.7.4.2)"]
355
  interpolatable = ["scipy", "munkres"]
356
+ lxml = ["lxml (>=4.0,<5)"]
357
  pathops = ["skia-pathops (>=0.5.0)"]
358
+ plot = ["matplotlib"]
 
 
 
359
  repacker = ["uharfbuzz (>=0.23.0)"]
360
+ symfont = ["sympy"]
361
  type1 = ["xattr"]
362
+ ufo = ["fs (>=2.2.0,<3)"]
363
+ unicode = ["unicodedata2 (>=14.0.0)"]
364
+ woff = ["zopfli (>=0.1.4)", "brotlicffi (>=0.8.0)", "brotli (>=1.0.1)"]
365
 
366
  [[package]]
367
  name = "frozenlist"
 
380
  python-versions = ">=3.7"
381
 
382
  [package.extras]
383
+ abfs = ["adlfs"]
384
+ adl = ["adlfs"]
385
+ arrow = ["pyarrow (>=1)"]
386
+ dask = ["dask", "distributed"]
387
+ dropbox = ["dropboxdrivefs", "requests", "dropbox"]
388
  entrypoints = ["importlib-metadata"]
389
+ fuse = ["fusepy"]
390
+ gcs = ["gcsfs"]
391
  git = ["pygit2"]
392
+ github = ["requests"]
393
  gs = ["gcsfs"]
 
394
  gui = ["panel"]
395
+ hdfs = ["pyarrow (>=1)"]
 
 
 
 
 
 
 
 
396
  http = ["requests", "aiohttp"]
 
397
  libarchive = ["libarchive-c"]
398
+ oci = ["ocifs"]
399
+ s3 = ["s3fs"]
400
+ sftp = ["paramiko"]
401
  smb = ["smbprotocol"]
402
+ ssh = ["paramiko"]
403
+ tqdm = ["tqdm"]
404
 
405
  [[package]]
406
  name = "gradio"
 
439
  python-versions = ">=3.6"
440
 
441
  [[package]]
442
+ name = "hu-core-news-trf"
443
+ version = "3.2.2"
444
+ description = "Hungarian transformer pipeline (huBert) for HuSpaCy. Components: transformer, senter, tagger, morphologizer, lemmatizer, parser, ner"
445
  category = "main"
446
  optional = false
447
  python-versions = "*"
448
 
449
  [package.dependencies]
450
+ spacy = ">=3.2.4,<3.3.0"
451
+ spacy-experimental = "0.4.0"
452
+ spacy-transformers = ">=1.1.4,<1.2.0"
453
 
454
  [package.source]
455
  type = "url"
456
+ url = "https://huggingface.co/huspacy/hu_core_news_trf/resolve/main/hu_core_news_trf-any-py3-none-any.whl"
457
+ [[package]]
458
+ name = "huggingface-hub"
459
+ version = "0.8.1"
460
+ description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
461
+ category = "main"
462
+ optional = false
463
+ python-versions = ">=3.7.0"
464
+
465
+ [package.dependencies]
466
+ filelock = "*"
467
+ packaging = ">=20.9"
468
+ pyyaml = ">=5.1"
469
+ requests = "*"
470
+ tqdm = "*"
471
+ typing-extensions = ">=3.7.4.3"
472
+
473
+ [package.extras]
474
+ all = ["pytest", "pytest-cov", "datasets", "soundfile", "black (>=22.0,<23.0)", "isort (>=5.5.4)", "flake8 (>=3.8.3)"]
475
+ dev = ["pytest", "pytest-cov", "datasets", "soundfile", "black (>=22.0,<23.0)", "isort (>=5.5.4)", "flake8 (>=3.8.3)"]
476
+ fastai = ["toml", "fastai (>=2.4)", "fastcore (>=1.3.27)"]
477
+ quality = ["black (>=22.0,<23.0)", "isort (>=5.5.4)", "flake8 (>=3.8.3)"]
478
+ tensorflow = ["tensorflow", "pydot", "graphviz"]
479
+ testing = ["pytest", "pytest-cov", "datasets", "soundfile"]
480
+ torch = ["torch"]
481
+
482
  [[package]]
483
  name = "idna"
484
  version = "3.3"
 
556
  uc-micro-py = "*"
557
 
558
  [package.extras]
 
 
559
  benchmark = ["pytest", "pytest-benchmark"]
560
  dev = ["pre-commit", "isort", "flake8", "black"]
561
+ doc = ["sphinx", "sphinx-book-theme", "myst-parser"]
562
+ test = ["coverage", "pytest", "pytest-cov"]
563
 
564
  [[package]]
565
  name = "lxml"
 
570
  python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*"
571
 
572
  [package.extras]
 
573
  cssselect = ["cssselect (>=0.7)"]
574
  html5 = ["html5lib"]
575
  htmlsoup = ["beautifulsoup4"]
576
+ source = ["Cython (>=0.29.7)"]
577
 
578
  [[package]]
579
  name = "markdown-it-py"
 
589
  mdurl = ">=0.1,<1.0"
590
 
591
  [package.extras]
592
+ benchmarking = ["psutil", "pytest", "pytest-benchmark (>=3.2,<4.0)"]
593
+ code_style = ["pre-commit (==2.6)"]
594
  compare = ["commonmark (>=0.9.1,<0.10.0)", "markdown (>=3.3.6,<3.4.0)", "mistletoe (>=0.8.1,<0.9.0)", "mistune (>=2.0.2,<2.1.0)", "panflute (>=2.1.3,<2.2.0)"]
595
+ linkify = ["linkify-it-py (>=1.0,<2.0)"]
596
+ plugins = ["mdit-py-plugins"]
597
  profiling = ["gprof2dot"]
598
+ rtd = ["attrs", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx-book-theme"]
599
  testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
 
 
 
600
 
601
  [[package]]
602
  name = "markupsafe"
 
637
  markdown-it-py = ">=1.0.0,<3.0.0"
638
 
639
  [package.extras]
 
640
  code_style = ["pre-commit (==2.6)"]
641
+ rtd = ["myst-parser (>=0.14.0,<0.15.0)", "sphinx-book-theme (>=0.1.0,<0.2.0)"]
642
  testing = ["coverage", "pytest (>=3.6,<4)", "pytest-cov", "pytest-regressions"]
643
 
644
  [[package]]
 
683
 
684
  [package.extras]
685
  default = ["numpy (>=1.19)", "scipy (>=1.8)", "matplotlib (>=3.4)", "pandas (>=1.3)"]
 
 
686
  developer = ["pre-commit (>=2.19)", "mypy (>=0.960)"]
687
+ doc = ["sphinx (>=5)", "pydata-sphinx-theme (>=0.9)", "sphinx-gallery (>=0.10)", "numpydoc (>=1.4)", "pillow (>=9.1)", "nb2plots (>=0.6)", "texext (>=0.6.6)"]
688
  extra = ["lxml (>=4.6)", "pygraphviz (>=1.9)", "pydot (>=1.4.2)", "sympy (>=1.10)"]
689
+ test = ["pytest (>=7.1)", "pytest-cov (>=3.0)", "codecov (>=2.1)"]
690
 
691
  [[package]]
692
  name = "newspaper3k"
 
726
  tqdm = "*"
727
 
728
  [package.extras]
 
729
  all = ["numpy", "pyparsing", "scipy", "matplotlib", "twython", "requests", "scikit-learn", "python-crfsuite"]
 
 
 
730
  corenlp = ["requests"]
731
+ machine_learning = ["numpy", "python-crfsuite", "scikit-learn", "scipy"]
732
+ plot = ["matplotlib"]
733
+ tgrep = ["pyparsing"]
734
+ twitter = ["twython"]
735
 
736
  [[package]]
737
  name = "numpy"
 
795
  six = "*"
796
 
797
  [package.extras]
 
798
  all = ["pyasn1 (>=0.1.7)", "pynacl (>=1.0.1)", "bcrypt (>=3.1.3)", "invoke (>=1.3)", "gssapi (>=1.4.1)", "pywin32 (>=2.1.8)"]
799
+ ed25519 = ["pynacl (>=1.0.1)", "bcrypt (>=3.1.3)"]
800
  gssapi = ["pyasn1 (>=0.1.7)", "gssapi (>=1.4.1)", "pywin32 (>=2.1.8)"]
801
  invoke = ["invoke (>=1.3)"]
802
 
 
813
  typer = ">=0.3.0,<1.0.0"
814
 
815
  [package.extras]
 
 
816
  all = ["google-cloud-storage (>=1.26.0,<2.0.0)", "boto3", "pytest", "pytest-coverage", "mock", "typer-cli"]
817
  gcs = ["google-cloud-storage (>=1.26.0,<2.0.0)"]
818
+ s3 = ["boto3"]
819
+ test = ["pytest", "pytest-coverage", "mock", "typer-cli"]
820
 
821
  [[package]]
822
  name = "phonenumbers"
 
950
  python-versions = ">=3.7"
951
 
952
  [package.extras]
 
953
  doc = ["sphinx", "sphinx-rtd-theme"]
954
+ test = ["pytest", "pytest-cov", "pytest-flake8", "pytest-isort", "coverage"]
955
 
956
  [[package]]
957
  name = "python-dateutil"
 
1014
  urllib3 = ">=1.21.1,<1.27"
1015
 
1016
  [package.extras]
 
1017
  socks = ["PySocks (>=1.5.6,!=1.5.7)"]
1018
+ use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
1019
 
1020
  [[package]]
1021
  name = "requests-file"
 
1044
  threadpoolctl = ">=2.0.0"
1045
 
1046
  [package.extras]
 
 
1047
  benchmark = ["matplotlib (>=3.1.2)", "pandas (>=1.0.5)", "memory-profiler (>=0.57.0)"]
1048
+ docs = ["matplotlib (>=3.1.2)", "scikit-image (>=0.14.5)", "pandas (>=1.0.5)", "seaborn (>=0.9.0)", "memory-profiler (>=0.57.0)", "sphinx (>=4.0.1)", "sphinx-gallery (>=0.7.0)", "numpydoc (>=1.2.0)", "Pillow (>=7.1.2)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"]
1049
  examples = ["matplotlib (>=3.1.2)", "scikit-image (>=0.14.5)", "pandas (>=1.0.5)", "seaborn (>=0.9.0)"]
1050
+ tests = ["matplotlib (>=3.1.2)", "scikit-image (>=0.14.5)", "pandas (>=1.0.5)", "pytest (>=5.0.1)", "pytest-cov (>=2.9.0)", "flake8 (>=3.8.2)", "black (>=22.3.0)", "mypy (>=0.770)", "pyamg (>=4.0.0)", "numpydoc (>=1.2.0)"]
1051
 
1052
  [[package]]
1053
  name = "scipy"
 
1102
 
1103
  [package.extras]
1104
  all = ["boto3", "google-cloud-storage", "azure-storage-blob", "azure-common", "azure-core", "requests"]
 
 
 
1105
  azure = ["azure-storage-blob", "azure-common", "azure-core"]
1106
  gcs = ["google-cloud-storage"]
1107
+ http = ["requests"]
1108
+ s3 = ["boto3"]
1109
  test = ["boto3", "google-cloud-storage", "azure-storage-blob", "azure-common", "azure-core", "requests", "moto[server] (==1.3.14)", "pathlib2", "responses", "paramiko", "parameterizedtestcase", "pytest", "pytest-rerunfailures"]
1110
+ webhdfs = ["requests"]
1111
 
1112
  [[package]]
1113
  name = "sniffio"
 
1127
 
1128
  [[package]]
1129
  name = "spacy"
1130
+ version = "3.2.4"
1131
  description = "Industrial-strength Natural Language Processing (NLP) in Python"
1132
  category = "main"
1133
  optional = false
 
1136
  [package.dependencies]
1137
  blis = ">=0.4.0,<0.8.0"
1138
  catalogue = ">=2.0.6,<2.1.0"
1139
+ click = "<8.1.0"
1140
  cymem = ">=2.0.2,<2.1.0"
1141
  jinja2 = "*"
1142
  langcodes = ">=3.2.0,<4.0.0"
 
1147
  preshed = ">=3.0.2,<3.1.0"
1148
  pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<1.9.0"
1149
  requests = ">=2.13.0,<3.0.0"
1150
+ spacy-legacy = ">=3.0.8,<3.1.0"
1151
  spacy-loggers = ">=1.0.0,<2.0.0"
1152
+ srsly = ">=2.4.1,<3.0.0"
1153
+ thinc = ">=8.0.12,<8.1.0"
1154
  tqdm = ">=4.38.0,<5.0.0"
1155
  typer = ">=0.3.0,<0.5.0"
1156
+ wasabi = ">=0.8.1,<1.1.0"
1157
 
1158
  [package.extras]
 
 
1159
  apple = ["thinc-apple-ops (>=0.0.4,<1.0.0)"]
1160
+ cuda = ["cupy (>=5.0.0b4,<11.0.0)"]
1161
+ cuda100 = ["cupy-cuda100 (>=5.0.0b4,<11.0.0)"]
1162
+ cuda101 = ["cupy-cuda101 (>=5.0.0b4,<11.0.0)"]
 
 
1163
  cuda102 = ["cupy-cuda102 (>=5.0.0b4,<11.0.0)"]
 
 
 
1164
  cuda110 = ["cupy-cuda110 (>=5.0.0b4,<11.0.0)"]
1165
  cuda111 = ["cupy-cuda111 (>=5.0.0b4,<11.0.0)"]
1166
+ cuda112 = ["cupy-cuda112 (>=5.0.0b4,<11.0.0)"]
1167
+ cuda113 = ["cupy-cuda113 (>=5.0.0b4,<11.0.0)"]
 
 
 
1168
  cuda114 = ["cupy-cuda114 (>=5.0.0b4,<11.0.0)"]
1169
+ cuda115 = ["cupy-cuda115 (>=5.0.0b4,<11.0.0)"]
1170
+ cuda80 = ["cupy-cuda80 (>=5.0.0b4,<11.0.0)"]
1171
+ cuda90 = ["cupy-cuda90 (>=5.0.0b4,<11.0.0)"]
1172
+ cuda91 = ["cupy-cuda91 (>=5.0.0b4,<11.0.0)"]
1173
+ cuda92 = ["cupy-cuda92 (>=5.0.0b4,<11.0.0)"]
1174
+ ja = ["sudachipy (>=0.5.2,!=0.6.1)", "sudachidict-core (>=20211220)"]
1175
+ ko = ["natto-py (==0.9.0)"]
1176
+ lookups = ["spacy-lookups-data (>=1.0.3,<1.1.0)"]
1177
  ray = ["spacy-ray (>=0.1.0,<1.0.0)"]
1178
+ th = ["pythainlp (>=2.0)"]
1179
+ transformers = ["spacy-transformers (>=1.1.2,<1.2.0)"]
1180
+
1181
+ [[package]]
1182
+ name = "spacy-alignments"
1183
+ version = "0.8.5"
1184
+ description = "A spaCy package for the Rust tokenizations library"
1185
+ category = "main"
1186
+ optional = false
1187
+ python-versions = ">=3.6"
1188
+
1189
+ [[package]]
1190
+ name = "spacy-experimental"
1191
+ version = "0.4.0"
1192
+ description = "Cutting-edge experimental spaCy components and features"
1193
+ category = "main"
1194
+ optional = false
1195
+ python-versions = ">=3.6"
1196
+
1197
+ [package.dependencies]
1198
+ spacy = ">=3.2.0,<3.3.0"
1199
+
1200
+ [package.extras]
1201
+ torch = ["torch (>=1.5.0)"]
1202
 
1203
  [[package]]
1204
  name = "spacy-legacy"
 
1219
  [package.dependencies]
1220
  wasabi = ">=0.8.1,<1.1.0"
1221
 
1222
+ [[package]]
1223
+ name = "spacy-transformers"
1224
+ version = "1.1.6"
1225
+ description = "spaCy pipelines for pre-trained BERT and other transformers"
1226
+ category = "main"
1227
+ optional = false
1228
+ python-versions = ">=3.6"
1229
+
1230
+ [package.dependencies]
1231
+ spacy = ">=3.1.3,<4.0.0"
1232
+ spacy-alignments = ">=0.7.2,<1.0.0"
1233
+ srsly = ">=2.4.0,<3.0.0"
1234
+ torch = ">=1.6.0"
1235
+ transformers = ">=3.4.0,<4.20.0"
1236
+
1237
+ [package.extras]
1238
+ cuda = ["cupy (>=5.0.0b4)"]
1239
+ cuda100 = ["cupy-cuda100 (>=5.0.0b4)"]
1240
+ cuda101 = ["cupy-cuda101 (>=5.0.0b4)"]
1241
+ cuda102 = ["cupy-cuda102 (>=5.0.0b4)"]
1242
+ cuda110 = ["cupy-cuda110 (>=5.0.0b4)"]
1243
+ cuda111 = ["cupy-cuda111 (>=5.0.0b4)"]
1244
+ cuda112 = ["cupy-cuda112 (>=5.0.0b4)"]
1245
+ cuda80 = ["cupy-cuda80 (>=5.0.0b4)"]
1246
+ cuda90 = ["cupy-cuda90 (>=5.0.0b4)"]
1247
+ cuda91 = ["cupy-cuda91 (>=5.0.0b4)"]
1248
+ cuda92 = ["cupy-cuda92 (>=5.0.0b4)"]
1249
+
1250
  [[package]]
1251
  name = "srsly"
1252
  version = "2.4.3"
 
1297
  tqdm = ">=4.19.6"
1298
 
1299
  [package.extras]
 
 
1300
  build_and_test = ["build", "pytest (>=6.0,<7.0)", "pytest-cov", "twine (>=3.0.0)", "wheel"]
1301
  dev = ["black", "build", "flake8 (>=3.8.0)", "mypy (>=0.900)", "recommonmark (>=0.6.0,<0.7.0)", "sphinx (>=3.0.0,<4.0.0)", "pytest (>=6.0,<7.0)", "pytest-cov", "twine (>=3.0.0)", "wheel"]
1302
+ docs = ["recommonmark (>=0.6.0,<0.7.0)", "sphinx (>=3.0.0,<4.0.0)"]
1303
  lint_and_format = ["black", "flake8 (>=3.8.0)", "mypy (>=0.900)"]
1304
+ viz = ["matplotlib (>=3.0.0)"]
1305
 
1306
  [[package]]
1307
  name = "thinc"
 
1323
  wasabi = ">=0.8.1,<1.1.0"
1324
 
1325
  [package.extras]
1326
+ cuda = ["cupy (>=5.0.0b4)"]
1327
+ cuda100 = ["cupy-cuda100 (>=5.0.0b4)"]
1328
+ cuda101 = ["cupy-cuda101 (>=5.0.0b4)"]
1329
+ cuda102 = ["cupy-cuda102 (>=5.0.0b4)"]
1330
+ cuda110 = ["cupy-cuda110 (>=5.0.0b4)"]
1331
+ cuda111 = ["cupy-cuda111 (>=5.0.0b4)"]
1332
  cuda112 = ["cupy-cuda112 (>=5.0.0b4)"]
1333
  cuda113 = ["cupy-cuda113 (>=5.0.0b4)"]
 
 
 
 
1334
  cuda114 = ["cupy-cuda114 (>=5.0.0b4)"]
1335
+ cuda115 = ["cupy-cuda115 (>=5.0.0b4)"]
1336
  cuda80 = ["cupy-cuda80 (>=5.0.0b4)"]
1337
+ cuda90 = ["cupy-cuda90 (>=5.0.0b4)"]
1338
+ cuda91 = ["cupy-cuda91 (>=5.0.0b4)"]
1339
+ cuda92 = ["cupy-cuda92 (>=5.0.0b4)"]
1340
+ datasets = ["ml-datasets (>=0.2.0,<0.3.0)"]
1341
+ mxnet = ["mxnet (>=1.5.1,<1.6.0)"]
1342
  tensorflow = ["tensorflow (>=2.0.0,<2.6.0)"]
1343
+ torch = ["torch (>=1.6.0)"]
 
 
1344
 
1345
  [[package]]
1346
  name = "threadpoolctl"
 
1372
  requests = ">=2.1.0"
1373
  requests-file = ">=1.4"
1374
 
1375
+ [[package]]
1376
+ name = "tokenizers"
1377
+ version = "0.12.1"
1378
+ description = "Fast and Customizable Tokenizers"
1379
+ category = "main"
1380
+ optional = false
1381
+ python-versions = "*"
1382
+
1383
+ [package.extras]
1384
+ docs = ["sphinx", "sphinx-rtd-theme", "setuptools-rust"]
1385
+ testing = ["pytest", "requests", "numpy", "datasets"]
1386
+
1387
  [[package]]
1388
  name = "tomli"
1389
  version = "2.0.1"
 
1400
  optional = false
1401
  python-versions = ">=3.5"
1402
 
1403
+ [[package]]
1404
+ name = "torch"
1405
+ version = "1.11.0"
1406
+ description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
1407
+ category = "main"
1408
+ optional = false
1409
+ python-versions = ">=3.7.0"
1410
+
1411
+ [package.dependencies]
1412
+ typing-extensions = "*"
1413
+
1414
  [[package]]
1415
  name = "tqdm"
1416
  version = "4.64.0"
 
1423
  colorama = {version = "*", markers = "platform_system == \"Windows\""}
1424
 
1425
  [package.extras]
1426
+ dev = ["py-make (>=0.1.0)", "twine", "wheel"]
1427
  notebook = ["ipywidgets (>=6)"]
1428
  slack = ["slack-sdk"]
1429
+ telegram = ["requests"]
1430
+
1431
+ [[package]]
1432
+ name = "transformers"
1433
+ version = "4.19.4"
1434
+ description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
1435
+ category = "main"
1436
+ optional = false
1437
+ python-versions = ">=3.7.0"
1438
+
1439
+ [package.dependencies]
1440
+ filelock = "*"
1441
+ huggingface-hub = ">=0.1.0,<1.0"
1442
+ numpy = ">=1.17"
1443
+ packaging = ">=20.0"
1444
+ pyyaml = ">=5.1"
1445
+ regex = "!=2019.12.17"
1446
+ requests = "*"
1447
+ tokenizers = ">=0.11.1,<0.11.3 || >0.11.3,<0.13"
1448
+ tqdm = ">=4.27"
1449
+
1450
+ [package.extras]
1451
+ all = ["tensorflow (>=2.3)", "onnxconverter-common", "tf2onnx", "torch (>=1.0)", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "flax (>=0.3.5)", "optax (>=0.0.8)", "sentencepiece (>=0.1.91,!=0.1.92)", "protobuf (<=3.20.1)", "tokenizers (>=0.11.1,!=0.11.3,<0.13)", "torchaudio", "librosa", "pyctcdecode (>=0.3.0)", "phonemizer", "pillow", "optuna", "ray", "sigopt", "timm", "codecarbon (==1.2.0)"]
1452
+ audio = ["librosa", "pyctcdecode (>=0.3.0)", "phonemizer"]
1453
+ codecarbon = ["codecarbon (==1.2.0)"]
1454
+ deepspeed = ["deepspeed (>=0.6.4)"]
1455
+ deepspeed-testing = ["deepspeed (>=0.6.4)", "pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-timeout", "black (>=22.0,<23.0)", "sacrebleu (>=1.4.12,<2.0.0)", "rouge-score", "nltk", "GitPython (<3.1.19)", "hf-doc-builder (>=0.3.0)", "protobuf (<=3.20.1)", "sacremoses", "rjieba", "faiss-cpu", "cookiecutter (==1.7.3)", "optuna"]
1456
+ dev = ["tensorflow (>=2.3)", "onnxconverter-common", "tf2onnx", "torch (>=1.0)", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "flax (>=0.3.5)", "optax (>=0.0.8)", "sentencepiece (>=0.1.91,!=0.1.92)", "protobuf (<=3.20.1)", "tokenizers (>=0.11.1,!=0.11.3,<0.13)", "torchaudio", "librosa", "pyctcdecode (>=0.3.0)", "phonemizer", "pillow", "optuna", "ray", "sigopt", "timm", "codecarbon (==1.2.0)", "pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-timeout", "black (>=22.0,<23.0)", "sacrebleu (>=1.4.12,<2.0.0)", "rouge-score", "nltk", "GitPython (<3.1.19)", "hf-doc-builder (>=0.3.0)", "sacremoses", "rjieba", "faiss-cpu", "cookiecutter (==1.7.3)", "isort (>=5.5.4)", "flake8 (>=3.8.3)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "unidic-lite (>=1.0.7)", "unidic (>=1.0.2)", "hf-doc-builder", "scikit-learn"]
1457
+ dev-tensorflow = ["pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-timeout", "black (>=22.0,<23.0)", "sacrebleu (>=1.4.12,<2.0.0)", "rouge-score", "nltk", "GitPython (<3.1.19)", "hf-doc-builder (>=0.3.0)", "protobuf (<=3.20.1)", "sacremoses", "rjieba", "faiss-cpu", "cookiecutter (==1.7.3)", "tensorflow (>=2.3)", "onnxconverter-common", "tf2onnx", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.11.1,!=0.11.3,<0.13)", "pillow", "isort (>=5.5.4)", "flake8 (>=3.8.3)", "hf-doc-builder", "scikit-learn", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "librosa", "pyctcdecode (>=0.3.0)", "phonemizer"]
1458
+ dev-torch = ["pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-timeout", "black (>=22.0,<23.0)", "sacrebleu (>=1.4.12,<2.0.0)", "rouge-score", "nltk", "GitPython (<3.1.19)", "hf-doc-builder (>=0.3.0)", "protobuf (<=3.20.1)", "sacremoses", "rjieba", "faiss-cpu", "cookiecutter (==1.7.3)", "torch (>=1.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.11.1,!=0.11.3,<0.13)", "torchaudio", "librosa", "pyctcdecode (>=0.3.0)", "phonemizer", "pillow", "optuna", "ray", "sigopt", "timm", "codecarbon (==1.2.0)", "isort (>=5.5.4)", "flake8 (>=3.8.3)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "unidic-lite (>=1.0.7)", "unidic (>=1.0.2)", "hf-doc-builder", "scikit-learn", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"]
1459
+ docs = ["tensorflow (>=2.3)", "onnxconverter-common", "tf2onnx", "torch (>=1.0)", "jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "flax (>=0.3.5)", "optax (>=0.0.8)", "sentencepiece (>=0.1.91,!=0.1.92)", "protobuf (<=3.20.1)", "tokenizers (>=0.11.1,!=0.11.3,<0.13)", "torchaudio", "librosa", "pyctcdecode (>=0.3.0)", "phonemizer", "pillow", "optuna", "ray", "sigopt", "timm", "codecarbon (==1.2.0)", "hf-doc-builder"]
1460
+ docs_specific = ["hf-doc-builder"]
1461
+ fairscale = ["fairscale (>0.3)"]
1462
+ flax = ["jax (>=0.2.8,!=0.3.2,<=0.3.6)", "jaxlib (>=0.1.65,<=0.3.6)", "flax (>=0.3.5)", "optax (>=0.0.8)"]
1463
+ flax-speech = ["librosa", "pyctcdecode (>=0.3.0)", "phonemizer"]
1464
+ ftfy = ["ftfy"]
1465
+ integrations = ["optuna", "ray", "sigopt"]
1466
+ ja = ["fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "unidic-lite (>=1.0.7)", "unidic (>=1.0.2)"]
1467
+ modelcreation = ["cookiecutter (==1.7.3)"]
1468
+ onnx = ["onnxconverter-common", "tf2onnx", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"]
1469
+ onnxruntime = ["onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)"]
1470
+ optuna = ["optuna"]
1471
+ quality = ["black (>=22.0,<23.0)", "isort (>=5.5.4)", "flake8 (>=3.8.3)", "GitPython (<3.1.19)", "hf-doc-builder (>=0.3.0)"]
1472
+ ray = ["ray"]
1473
+ retrieval = ["faiss-cpu", "datasets"]
1474
+ sagemaker = ["sagemaker (>=2.31.0)"]
1475
+ sentencepiece = ["sentencepiece (>=0.1.91,!=0.1.92)", "protobuf (<=3.20.1)"]
1476
+ serving = ["pydantic", "uvicorn", "fastapi", "starlette"]
1477
+ sigopt = ["sigopt"]
1478
+ sklearn = ["scikit-learn"]
1479
+ speech = ["torchaudio", "librosa", "pyctcdecode (>=0.3.0)", "phonemizer"]
1480
+ testing = ["pytest", "pytest-xdist", "timeout-decorator", "parameterized", "psutil", "datasets", "pytest-timeout", "black (>=22.0,<23.0)", "sacrebleu (>=1.4.12,<2.0.0)", "rouge-score", "nltk", "GitPython (<3.1.19)", "hf-doc-builder (>=0.3.0)", "protobuf (<=3.20.1)", "sacremoses", "rjieba", "faiss-cpu", "cookiecutter (==1.7.3)"]
1481
+ tf = ["tensorflow (>=2.3)", "onnxconverter-common", "tf2onnx"]
1482
+ tf-cpu = ["tensorflow-cpu (>=2.3)", "onnxconverter-common", "tf2onnx"]
1483
+ tf-speech = ["librosa", "pyctcdecode (>=0.3.0)", "phonemizer"]
1484
+ timm = ["timm"]
1485
+ tokenizers = ["tokenizers (>=0.11.1,!=0.11.3,<0.13)"]
1486
+ torch = ["torch (>=1.0)"]
1487
+ torch-speech = ["torchaudio", "librosa", "pyctcdecode (>=0.3.0)", "phonemizer"]
1488
+ torchhub = ["filelock", "huggingface-hub (>=0.1.0,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf (<=3.20.1)", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "torch (>=1.0)", "tokenizers (>=0.11.1,!=0.11.3,<0.13)", "tqdm (>=4.27)"]
1489
+ vision = ["pillow"]
1490
 
1491
  [[package]]
1492
  name = "typer"
 
1500
  click = ">=7.1.1,<9.0.0"
1501
 
1502
  [package.extras]
 
 
1503
  all = ["colorama (>=0.4.3,<0.5.0)", "shellingham (>=1.3.0,<2.0.0)"]
1504
  dev = ["autoflake (>=1.3.1,<2.0.0)", "flake8 (>=3.8.3,<4.0.0)"]
1505
+ doc = ["mkdocs (>=1.1.2,<2.0.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "mdx-include (>=1.4.1,<2.0.0)"]
1506
+ test = ["shellingham (>=1.3.0,<2.0.0)", "pytest (>=4.4.0,<5.4.0)", "pytest-cov (>=2.10.0,<3.0.0)", "coverage (>=5.2,<6.0)", "pytest-xdist (>=1.32.0,<2.0.0)", "pytest-sugar (>=0.9.4,<0.10.0)", "mypy (==0.910)", "black (>=22.3.0,<23.0.0)", "isort (>=5.0.6,<6.0.0)"]
1507
 
1508
  [[package]]
1509
  name = "typing-extensions"
 
1576
  [metadata]
1577
  lock-version = "1.1"
1578
  python-versions = "~3.8"
1579
+ content-hash = "6eddc3d82625c70609c7d662f96be93f68188162ab16c1b708812a1ce1bcc64f"
1580
 
1581
  [metadata.files]
1582
  aiohttp = [
 
1788
  {file = "charset_normalizer-2.0.12-py3-none-any.whl", hash = "sha256:6881edbebdb17b39b4eaaa821b438bf6eddffb4468cf344f09f89def34a8b1df"},
1789
  ]
1790
  click = [
1791
+ {file = "click-8.0.4-py3-none-any.whl", hash = "sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1"},
1792
+ {file = "click-8.0.4.tar.gz", hash = "sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb"},
1793
  ]
1794
  colorama = [
1795
  {file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"},
 
1829
  ]
1830
  cymem = [
1831
  {file = "cymem-2.0.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:700540b68e96a7056d0691d467df2bbaaf0934a3e6fe2383669998cbee19580a"},
 
1832
  {file = "cymem-2.0.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:971cf0a8437dfb4185c3049c086e463612fe849efadc0f5cc153fc81c501da7d"},
1833
  {file = "cymem-2.0.6-cp310-cp310-win_amd64.whl", hash = "sha256:6b0d1a6b0a1296f31fa9e4b7ae5ea49394084ecc883b1ae6fec4844403c43468"},
1834
  {file = "cymem-2.0.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b8e1c18bb00800425576710468299153caad20c64ddb6819d40a6a34e21ee21c"},
 
1838
  {file = "cymem-2.0.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd52d8a81881804625df88453611175ab7e0099b34f52204da1f6940cf2e83c9"},
1839
  {file = "cymem-2.0.6-cp37-cp37m-win_amd64.whl", hash = "sha256:4749f220e4c06ec44eb10de13794ff0508cdc4f8eff656cf49cab2cdb3122c0c"},
1840
  {file = "cymem-2.0.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2aa3fa467d906cd2c27fa0a2e2952dd7925f5fcc7973fab6d815ef6acb25aad8"},
 
1841
  {file = "cymem-2.0.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea535f74ab6024e7416f93de564e5c81fb7c0964b96280de66f60aeb05f0cf53"},
1842
  {file = "cymem-2.0.6-cp38-cp38-win_amd64.whl", hash = "sha256:4f87fe087f2ae36c3e20e2b1a29d7f76a28c035372d0a97655f26223d975235a"},
1843
  {file = "cymem-2.0.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a93fba62fe79dbf6fc4d5b6d804a6e114b44af3ff3d40a28833ee39f21bd336b"},
 
1844
  {file = "cymem-2.0.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:04676d696596b0db3f3c5a3936bab12fb6f24278921a6622bb185e61765b2b4d"},
1845
  {file = "cymem-2.0.6-cp39-cp39-win_amd64.whl", hash = "sha256:c59293b232b53ebb47427f16cf648e937022f489cff36c11d1d8a1f0075b6609"},
1846
  {file = "cymem-2.0.6.tar.gz", hash = "sha256:169725b5816959d34de2545b33fee6a8021a6e08818794a426c5a4f981f17e5e"},
 
1947
  {file = "h11-0.13.0-py3-none-any.whl", hash = "sha256:8ddd78563b633ca55346c8cd41ec0af27d3c79931828beffb46ce70a379e7442"},
1948
  {file = "h11-0.13.0.tar.gz", hash = "sha256:70813c1135087a248a4d38cc0e1a0181ffab2188141a93eaf567940c3957ff06"},
1949
  ]
1950
+ hu-core-news-trf = []
1951
+ huggingface-hub = [
1952
+ {file = "huggingface_hub-0.8.1-py3-none-any.whl", hash = "sha256:a11fb8d696a26f927833d46b7633105fd864fd92a2beb1140cbf1b2f703dedb3"},
1953
+ {file = "huggingface_hub-0.8.1.tar.gz", hash = "sha256:75c70797da54b849f06c2cbf7ba2217250ee217230b9f65547d5db3c5bd84bb5"},
1954
+ ]
1955
  idna = [
1956
  {file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"},
1957
  {file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"},
 
2429
  ]
2430
  preshed = [
2431
  {file = "preshed-3.0.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:66a71ced487516cf81fd0431a3a843514262ae2f33e9a7688b87562258fa75d5"},
 
2432
  {file = "preshed-3.0.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c98f725d8478f3ade4ab1ea00f50a92d2d9406d37276bc46fd8bab1d47452c4"},
2433
  {file = "preshed-3.0.6-cp310-cp310-win_amd64.whl", hash = "sha256:ea8aa9610837e907e8442e79300df0a861bfdb4dcaf026a5d9642a688ad04815"},
2434
  {file = "preshed-3.0.6-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e03ae3eee961106a517fcd827b5a7c51f7317236b3e665c989054ab8dc381d28"},
 
2438
  {file = "preshed-3.0.6-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:61b2ea656cb1c38d544cc774f1c2ad1cdab23167b46b35310a7e211d4ba9c6d0"},
2439
  {file = "preshed-3.0.6-cp37-cp37m-win_amd64.whl", hash = "sha256:87e1add41b7f6236a3ccc34788f47ab8682bc28e8a2d369089062e274494c1a0"},
2440
  {file = "preshed-3.0.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a279c138ad1d5be02547b1545254929588414b01571fe637016367f6a1aa11de"},
 
2441
  {file = "preshed-3.0.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3af09f4cfcdaca085fd87dac8107617c4e2bb0ad1458f953841b71e9728287f5"},
2442
  {file = "preshed-3.0.6-cp38-cp38-win_amd64.whl", hash = "sha256:f92e752a868ea2690e1b38c4b775251a145e0fce36b9bdd972539e8271b7a23a"},
2443
  {file = "preshed-3.0.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eaffbc71fdb8625f9aac4fe7e19e20bf318d1421ea05903bebe3e6ffef27b587"},
 
2444
  {file = "preshed-3.0.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cfe1495fcfc7f479de840ddc4f426dbb55351e218ae5c8712c1269183a4d0060"},
2445
  {file = "preshed-3.0.6-cp39-cp39-win_amd64.whl", hash = "sha256:92a8f49d17a63537a8beed48a049b62ef168ca07e0042a5b2bcdf178a1fb5d48"},
2446
  {file = "preshed-3.0.6.tar.gz", hash = "sha256:fb3b7588a3a0f2f2f1bf3fe403361b2b031212b73a37025aea1df7215af3772a"},
 
2734
  {file = "soupsieve-2.3.2.post1.tar.gz", hash = "sha256:fc53893b3da2c33de295667a0e19f078c14bf86544af307354de5fcf12a3f30d"},
2735
  ]
2736
  spacy = [
2737
+ {file = "spacy-3.2.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7e20c63ba47eaa33ebd4b2cc6eefa3e8906505273799138ad8ab231b146d8875"},
2738
+ {file = "spacy-3.2.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc9184973c9052e1bb9eeb975801e6906aacbe0c009533ec0c34f443832473fd"},
2739
+ {file = "spacy-3.2.4-cp310-cp310-win_amd64.whl", hash = "sha256:0168d97e7fbbddd3258016e4d3c10d1593b7129dddff146c14f3b103ade6b1cd"},
2740
+ {file = "spacy-3.2.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cff47cdaa824802cd38ae94fe98af9cde6810d86334cd283659c868e0011831a"},
2741
+ {file = "spacy-3.2.4-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:400af3490c36c1b6c895de526ec06f6c7655af5ca595743c07e09e9bc8f378ea"},
2742
+ {file = "spacy-3.2.4-cp36-cp36m-win_amd64.whl", hash = "sha256:87bd072ccacedbf8bc5a692fea1d5c320abd26821c63af157a7c95baa47dc36d"},
2743
+ {file = "spacy-3.2.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:36e9ef5a32834383d37bbd27fca49388e31e9b53f77c91ba8ccbf19af10e3aef"},
2744
+ {file = "spacy-3.2.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e9a98999b0fce03d4f483112837ac7111378449ace069c7cd050908f0fa5d9f"},
2745
+ {file = "spacy-3.2.4-cp37-cp37m-win_amd64.whl", hash = "sha256:89be328ff378e4cdcfb4dcf38ca2fad740f87213825ed10e8ce9f54b822277b8"},
2746
+ {file = "spacy-3.2.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ed29278fc89f07c1999ceca5f6702b379589c8e884a57816bdaeb05a1a7b2bbb"},
2747
+ {file = "spacy-3.2.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:090e684eec551b5b7d56d9242cea18742515a706191ad158e32e16e8f2fe15ac"},
2748
+ {file = "spacy-3.2.4-cp38-cp38-win_amd64.whl", hash = "sha256:2053cb78bcf4eec38aa266890a5700167a284d1a26197f851710d29f3d7071b3"},
2749
+ {file = "spacy-3.2.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6db861f69f18ba5e00d0bd44744cf1662e00cc3b564d17a1ccdc4625ec3d5c3d"},
2750
+ {file = "spacy-3.2.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac2288e87de1066ad65676e930f53978d6ee97c34044dca4d24f64a24e2a88b6"},
2751
+ {file = "spacy-3.2.4-cp39-cp39-win_amd64.whl", hash = "sha256:e759e27da39e469b6367b82281a10eb4e50de04260ba49d42091cbdfe2d99633"},
2752
+ {file = "spacy-3.2.4.tar.gz", hash = "sha256:3e4c6f298d54044582daca1142b082ee38831bb3d7bb931d2ee601e8b8dce64f"},
2753
+ ]
2754
+ spacy-alignments = [
2755
+ {file = "spacy-alignments-0.8.5.tar.gz", hash = "sha256:94ecb48f884ab8fa479d9929997281ebe0dc5b98fb8b5e53ebc252e88b023e21"},
2756
+ {file = "spacy_alignments-0.8.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:96d0df4bbd88a840ac136cb9b191e64fb21faa893afc6dd4dd7061242095f254"},
2757
+ {file = "spacy_alignments-0.8.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4a0793ddd98ef840ac60be3583a096e0168c99f4604b01d3060ecd578f9920d2"},
2758
+ {file = "spacy_alignments-0.8.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:095ac0b876ca823b1a44a4779c0de47d73d643601b934c4d996d79329440f40b"},
2759
+ {file = "spacy_alignments-0.8.5-cp310-cp310-win_amd64.whl", hash = "sha256:d33e3f02753e0febb469f7700750eb726bb008025efb6e553b6f86c02bc98e67"},
2760
+ {file = "spacy_alignments-0.8.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:4f4ac27b1f2a5a4a5f3c43ee171fadf10a24220a5921846cd6b92514bd7f46e2"},
2761
+ {file = "spacy_alignments-0.8.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7eb900cdcbb4ce83f81e7b36fd6b4b05ce305b693f17e5b385ebfb9e859f473"},
2762
+ {file = "spacy_alignments-0.8.5-cp36-cp36m-win_amd64.whl", hash = "sha256:4591d02a8fbf697fe3142d2b8278fcf28f7fb19c1a527a4876b281d7525ccea0"},
2763
+ {file = "spacy_alignments-0.8.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6bf4c27615ddbb50d7920e870157400acdae15a0d5bd43694432dd668798b018"},
2764
+ {file = "spacy_alignments-0.8.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a15cc4ec3251feaddf5ccd404e1691717594b05e8cd4957baca46f1ce6c3723f"},
2765
+ {file = "spacy_alignments-0.8.5-cp37-cp37m-win_amd64.whl", hash = "sha256:60a659fc8fdac78f28def69155ccabdf63ff7578e15d79b9a20821f96c9c4744"},
2766
+ {file = "spacy_alignments-0.8.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:57a142679f609994fe5609aea1e0d779857277f116f71f34891c879d7cec8166"},
2767
+ {file = "spacy_alignments-0.8.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:efa040247a82f46dd285e8144b86d1b06e757543d757a5a98157516a36526df2"},
2768
+ {file = "spacy_alignments-0.8.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64c6bde918394ed772a9c10997fe6e4c28dd8d093f51c94eeecaeed3c9fb3230"},
2769
+ {file = "spacy_alignments-0.8.5-cp38-cp38-win_amd64.whl", hash = "sha256:9288d6ab901d183a9f4c5037a5a20477936d62d8222bd026a15c044bddd20b6f"},
2770
+ {file = "spacy_alignments-0.8.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:24642819c1068215c07d1de036a22b00d136050bb6f9de4f898cfb18f4e4ef0f"},
2771
+ {file = "spacy_alignments-0.8.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:46956d142b1972dd992577aca5c1d32b49b6621768ec8f272e73b8c0dba805fd"},
2772
+ {file = "spacy_alignments-0.8.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2c209d8aa4b0aa07ba4b98f51de679f2e415a1e069cc4caf1efb7bd0f6059ac"},
2773
+ {file = "spacy_alignments-0.8.5-cp39-cp39-win_amd64.whl", hash = "sha256:176c0cc59da7789daf7a313c0bcbc61d1dde5539068a24c8aba728917e481a2d"},
2774
+ ]
2775
+ spacy-experimental = [
2776
+ {file = "spacy-experimental-0.4.0.tar.gz", hash = "sha256:0739e8350c103076e221bcaec019d5a55dbb1f6d26125d2e59b1ca91743b3799"},
2777
+ {file = "spacy_experimental-0.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0f3dd83241ea86ddcd2ea99f5d346460570e10349c7fc6d7446d84c709ff9f1a"},
2778
+ {file = "spacy_experimental-0.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:df5a969adf3de3befe3e7b916ac2adc5f11f0a1af9ad07349382f2496da89b07"},
2779
+ {file = "spacy_experimental-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:35d63e2aba04c047c9a1fa5274221ef86608885eaae3745f65ab75ead1e6b6e8"},
2780
+ {file = "spacy_experimental-0.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:45b0dafe2e68cf5443a4b443868daf64fa14f537ee290c22988a0ad9d560697b"},
2781
+ {file = "spacy_experimental-0.4.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:16da4a4a16d8abc41c6b9eb505903a8714c9de755b9125df75d4291eda322395"},
2782
+ {file = "spacy_experimental-0.4.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0eb41150d4893afee25e6b1796fd7b760a632ff8eb219cb82ae5a2e7d3478d1c"},
2783
+ {file = "spacy_experimental-0.4.0-cp36-cp36m-win_amd64.whl", hash = "sha256:b8c820a6e1671cd33a46099e97193f39e7f551bdb1e13af7edde6ff07832a759"},
2784
+ {file = "spacy_experimental-0.4.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:f8c667a21a54af11a00346a8e89f6c1ba45a0af25fb1f3967c27ac9dcaf7a2a0"},
2785
+ {file = "spacy_experimental-0.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99a163765edbfccca36f60b1e1ac101f1bb54991d0d66eedc364422bf25f55fe"},
2786
+ {file = "spacy_experimental-0.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:347d8901d3a1c592f7b4365cbfa10c5198ab0764b0acafaaf17577d1b4f3c366"},
2787
+ {file = "spacy_experimental-0.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:226e5ae003130b22d3df9699f4ec891d897bf94b42d11f14cc30cdc57e70c795"},
2788
+ {file = "spacy_experimental-0.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:150279a0a2da42ce2cc87e16cc70deadcdde66fe60c4b3a49609c6f052b4e58e"},
2789
+ {file = "spacy_experimental-0.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31ae583f41cb316e6bc120e401de192e01ab84a9d10a1443e7c8185bfd7ce9a7"},
2790
+ {file = "spacy_experimental-0.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:95438909caaeb66beab25643f559b64447f15fe1994d3739ba5c6c64c9daa739"},
2791
+ {file = "spacy_experimental-0.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:90ffbbda69330a5e7b195dbafa44a50ce9beff7d4d60026b62f1305a3f34d334"},
2792
+ {file = "spacy_experimental-0.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:523bf9a0ca419c7232bc0bd70b85f4bff07c1de5b52817faa618a839a05e70e7"},
2793
+ {file = "spacy_experimental-0.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2068d052082d157b7de0fe05282107aed4203198357648bd9619581f2b0fba0a"},
2794
+ {file = "spacy_experimental-0.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:cb68325f9b656e543b010d4270ad5e793d31f5c8ec135e62c5baa77297be5f19"},
2795
  ]
2796
  spacy-legacy = [
2797
  {file = "spacy-legacy-3.0.9.tar.gz", hash = "sha256:4f7dcbc4e6c8e8cb4eadbb009f9c0a1a2a67442e0032c8d6776c9470c3759903"},
 
2801
  {file = "spacy-loggers-1.0.2.tar.gz", hash = "sha256:e75d44f4cf99e6763d7132ca7c8c420e0a92790222a08bc8eb9e24ea2c13536e"},
2802
  {file = "spacy_loggers-1.0.2-py3-none-any.whl", hash = "sha256:d48c9313a577ad1818da961cf6db71a73fd1e556ae47e6e68d7e28b541d11e18"},
2803
  ]
2804
+ spacy-transformers = [
2805
+ {file = "spacy-transformers-1.1.6.tar.gz", hash = "sha256:7a05a172b7d1f01ea5ee38b428ecee333d7c6197a935bfd9433e52d11128f477"},
2806
+ {file = "spacy_transformers-1.1.6-py2.py3-none-any.whl", hash = "sha256:370a51d6b2eeae9f6fe0b90c76fb6602579173659acd7f68c1a6dd6b2655f5dc"},
2807
+ ]
2808
  srsly = [
2809
  {file = "srsly-2.4.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2d0236feafe3805b384532221596e6749a54d0ff10ba022b333dc1de7aa1b2f7"},
2810
  {file = "srsly-2.4.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f96af9fde9f58d5923091fa723fa0fed58a83781b98e143a5d1fac5e738b9f0d"},
 
2866
  {file = "tldextract-3.3.0-py3-none-any.whl", hash = "sha256:5d88321b1b528ebb8f678c72ab023f37caf6381f6af9576b4e60fd266cff178c"},
2867
  {file = "tldextract-3.3.0.tar.gz", hash = "sha256:adcd24abf21ce3450417cd5a00f23b7e57554ce8ae827334dd12bfcbb6274cf1"},
2868
  ]
2869
+ tokenizers = [
2870
+ {file = "tokenizers-0.12.1-cp310-cp310-macosx_10_11_x86_64.whl", hash = "sha256:d737df0f8f26e093a82bfb106b6cfb510a0e9302d35834568e5b20b73ddc5a9c"},
2871
+ {file = "tokenizers-0.12.1-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f1271224acafb27639c432e1ce4e7d38eab40305ba1c546e871d5c8a32f4f195"},
2872
+ {file = "tokenizers-0.12.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdeba37c2fb44e1aec8a72af4cb369655b59ba313181b1b4b8183f08e759c49c"},
2873
+ {file = "tokenizers-0.12.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:53b5f4012ce3ffddd5b00827441b80dc7a0f6b41f4fc5248ae6d36e7d3920c6d"},
2874
+ {file = "tokenizers-0.12.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5188e13fc09edfe05712ca3ae5a44e7f2b0137927b1ca210d0fad90d3e58315a"},
2875
+ {file = "tokenizers-0.12.1-cp310-cp310-win32.whl", hash = "sha256:eff5ff411f18a201eec137b7b32fcb55e0c48b372d370bd24f965f5bad471fa4"},
2876
+ {file = "tokenizers-0.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:bdbca79726fe883c696088ea163715b2f902aec638a8e24bcf9790ff8fa45019"},
2877
+ {file = "tokenizers-0.12.1-cp36-cp36m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:28825dade9e52ad464164020758f9d49eb7251c32b6ae146601c506a23c67c0e"},
2878
+ {file = "tokenizers-0.12.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91906d725cb84d8ee71ce05fbb155d39d494849622b4f9349e5176a8eb01c49b"},
2879
+ {file = "tokenizers-0.12.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:230f51a0a82ca7b90077eaca2415f12ff9bd144607888b9c50c2ee543452322e"},
2880
+ {file = "tokenizers-0.12.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d4339c376b695de2ad8ccaebffa75e4dc1d7857be1103d80e7925b34af8cf78"},
2881
+ {file = "tokenizers-0.12.1-cp37-cp37m-macosx_10_11_x86_64.whl", hash = "sha256:27d93b712aa2d4346aa506ecd4ec9e94edeebeaf2d484357b482cdeffc02b5f5"},
2882
+ {file = "tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7f4cb68dc538b52240d1986d2034eb0a6373be2ab5f0787d1be3ad1444ce71b7"},
2883
+ {file = "tokenizers-0.12.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae6c04b629ac2cd2f695739988cb70b9bd8d5e7f849f5b14c4510e942bee5770"},
2884
+ {file = "tokenizers-0.12.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6a38b2019d4807d42afeff603a119094ee00f63bea2921136524c8814e9003f8"},
2885
+ {file = "tokenizers-0.12.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fde8dccb9033fa344ffce3ee1837939a50e7a210a768f1cf2059beeafa755481"},
2886
+ {file = "tokenizers-0.12.1-cp37-cp37m-win32.whl", hash = "sha256:38625595b2fd37bfcce64ff9bfb6868c07e9a7b7f205c909d94a615ce9472287"},
2887
+ {file = "tokenizers-0.12.1-cp37-cp37m-win_amd64.whl", hash = "sha256:01abe6fbfe55e4131ca0c4c3d1a9d7ef5df424a8d536e998d2a4fc0bc57935f4"},
2888
+ {file = "tokenizers-0.12.1-cp38-cp38-macosx_10_11_x86_64.whl", hash = "sha256:7c5c54080a7d5c89c990e0d478e0882dbac88926d43323a3aa236492a3c9455f"},
2889
+ {file = "tokenizers-0.12.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:419d113e3bcc4fe20a313afc47af81e62906306b08fe1601e1443d747d46af1f"},
2890
+ {file = "tokenizers-0.12.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9779944559cb7ace6a8516e402895f239b0d9d3c833c67dbaec496310e7e206"},
2891
+ {file = "tokenizers-0.12.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7d43de14b4469b57490dbaf136a31c266cb676fa22320f01f230af9219ae9034"},
2892
+ {file = "tokenizers-0.12.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:258873634406bd1d438c799993a5e44bbc0132ff055985c03c4fe30f702e9a33"},
2893
+ {file = "tokenizers-0.12.1-cp38-cp38-win32.whl", hash = "sha256:3f2647cc256d6a53d18b9dcd71d377828e9f8991fbcbd6fcd8ca2ceb174552b0"},
2894
+ {file = "tokenizers-0.12.1-cp38-cp38-win_amd64.whl", hash = "sha256:62a723bd4b18bc55121f5c34cd8efd6c651f2d3b81f81dd50e5351fb65b8a617"},
2895
+ {file = "tokenizers-0.12.1-cp39-cp39-macosx_10_11_x86_64.whl", hash = "sha256:411ebc89228f30218ffa9d9c49d414864b0df5026a47c24820431821c4360460"},
2896
+ {file = "tokenizers-0.12.1-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:619728df2551bdfe6f96ff177f9ded958e7ed9e2af94c8d5ac2834d1eb06d112"},
2897
+ {file = "tokenizers-0.12.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8cea98f3f9577d1541b7bb0f7a3308a911751067e1d83e01485c9d3411bbf087"},
2898
+ {file = "tokenizers-0.12.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:664f36f0a0d409c24f2201d495161fec4d8bc93e091fbb78814eb426f29905a3"},
2899
+ {file = "tokenizers-0.12.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0bf2380ad59c50222959a9b6f231339200a826fc5cb2be09ff96d8a59f65fc5e"},
2900
+ {file = "tokenizers-0.12.1-cp39-cp39-win32.whl", hash = "sha256:6a7a106d04154c2159db6cd7d042af2e2e0e53aee432f872fe6c8be45100436a"},
2901
+ {file = "tokenizers-0.12.1-cp39-cp39-win_amd64.whl", hash = "sha256:2158baf80cbc09259bfd6e0e0fc4597b611e7a72ad5443dad63918a90f1dd304"},
2902
+ {file = "tokenizers-0.12.1.tar.gz", hash = "sha256:070746f86efa6c873db341e55cf17bb5e7bdd5450330ca8eca542f5c3dab2c66"},
2903
+ ]
2904
  tomli = [
2905
  {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
2906
  {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
 
2909
  {file = "toolz-0.11.2-py3-none-any.whl", hash = "sha256:a5700ce83414c64514d82d60bcda8aabfde092d1c1a8663f9200c07fdcc6da8f"},
2910
  {file = "toolz-0.11.2.tar.gz", hash = "sha256:6b312d5e15138552f1bda8a4e66c30e236c831b612b2bf0005f8a1df10a4bc33"},
2911
  ]
2912
+ torch = [
2913
+ {file = "torch-1.11.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:62052b50fffc29ca7afc0c04ef8206b6f1ca9d10629cb543077e12967e8d0398"},
2914
+ {file = "torch-1.11.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:866bfba29ac98dec35d893d8e17eaec149d0ac7a53be7baae5c98069897db667"},
2915
+ {file = "torch-1.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:951640fb8db308a59d9b510e7d1ad910aff92913323bbe4bc75435347ddd346d"},
2916
+ {file = "torch-1.11.0-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:5d77b5ece78fdafa5c7f42995ff9474399d22571cd6b2de21a5d666306a2ff8c"},
2917
+ {file = "torch-1.11.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:b5a38682769b544c875ecc34bcb81fbad5c922139b61319aacffcfd8a32f528c"},
2918
+ {file = "torch-1.11.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:f82d77695a60626f2b7382d85bc566de8a6b3e50d32080755abc040db802e419"},
2919
+ {file = "torch-1.11.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:b96654d42566080a134e784705f33f8536b3b95b5dcde357ed7879b1692a5f78"},
2920
+ {file = "torch-1.11.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8ee7c2e8d7f7020d5bfbc1bb91b9591044c26bbd0cee5e4f694cfd7ed8649260"},
2921
+ {file = "torch-1.11.0-cp37-none-macosx_10_9_x86_64.whl", hash = "sha256:6860b1d1bf0bb0b67a6bd47f85a0e4c825b518eea13b5d6101999dbbcbd5bc0c"},
2922
+ {file = "torch-1.11.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:4322aa29f50da7f404db06cdf30896ea67b09f673af4a985afc7162bc897864d"},
2923
+ {file = "torch-1.11.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:e4d2e0ddd652f30e94cff750220324ec45705d4ecc69658f773b3cb1c7a28dd0"},
2924
+ {file = "torch-1.11.0-cp38-cp38-win_amd64.whl", hash = "sha256:34ce5ea4d8d85da32cdbadb50d4585106901e9f8a3527991daa70c13a09de1f7"},
2925
+ {file = "torch-1.11.0-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:0ccc85cd06227a3edf809e2c795fd5762c3d4e8a38b5c9f744c6e7cf841361bb"},
2926
+ {file = "torch-1.11.0-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:c1554e49d74f1b2c3e7202d77056ba2dd7465437585bac64062b580f714a44e9"},
2927
+ {file = "torch-1.11.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:58c7814502b1c129a650d7092033bbb0bbd64faf1a7941631aaa1aeaddc37570"},
2928
+ {file = "torch-1.11.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:831cf588f01dda9409e75576741d2823453990dee2983d670f2584b37a01adf7"},
2929
+ {file = "torch-1.11.0-cp39-cp39-win_amd64.whl", hash = "sha256:44a1d02fd20f827f0f36dc26fdcfc45e793806a6ad52769a22260655a77a4369"},
2930
+ {file = "torch-1.11.0-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:50fd9bf85c578c871c28f1cb0ace9dfc6024401c7f399b174fb0f370899f4454"},
2931
+ {file = "torch-1.11.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:0e48af66ad755f0f9c5f2664028a414f57c49d6adc37e77e06fe0004da4edb61"},
2932
+ ]
2933
  tqdm = [
2934
  {file = "tqdm-4.64.0-py2.py3-none-any.whl", hash = "sha256:74a2cdefe14d11442cedf3ba4e21a3b84ff9a2dbdc6cfae2c34addb2a14a5ea6"},
2935
  {file = "tqdm-4.64.0.tar.gz", hash = "sha256:40be55d30e200777a307a7585aee69e4eabb46b4ec6a4b4a5f2d9f11e7d5408d"},
2936
  ]
2937
+ transformers = [
2938
+ {file = "transformers-4.19.4-py3-none-any.whl", hash = "sha256:572d8ecbff29ec53769e0459b4334ebd1038f75ad25119a3006f8816643dccc4"},
2939
+ {file = "transformers-4.19.4.tar.gz", hash = "sha256:b8c0f9816b4c4c2f0265b24d0a0b9d4ae8b7b98fc779d63f92e3c121c4c2d483"},
2940
+ ]
2941
  typer = [
2942
  {file = "typer-0.4.1-py3-none-any.whl", hash = "sha256:e8467f0ebac0c81366c2168d6ad9f888efdfb6d4e1d3d5b4a004f46fa444b5c3"},
2943
  {file = "typer-0.4.1.tar.gz", hash = "sha256:5646aef0d936b2c761a10393f0384ee6b5c7fe0bb3e5cd710b17134ca1d99cff"},
pyproject.toml CHANGED
@@ -8,13 +8,13 @@ license = "Apache 2.0"
8
  [tool.poetry.dependencies]
9
  python = "~3.8"
10
  textacy = "^0.12.0"
11
- hu-core-news-lg = {url = "https://huggingface.co/huspacy/hu_core_news_lg/resolve/v3.3.0/hu_core_news_lg-any-py3-none-any.whl"}
12
  scipy = "~1.8.0"
13
  newspaper3k = "^0.2.8"
14
  gradio = "^3.0.18"
15
  Faker = "^13.13.0"
16
  presidio-analyzer = "^2.2.28"
17
  presidio-anonymizer = "^2.2.28"
 
18
 
19
  [tool.poetry.dev-dependencies]
20
 
 
8
  [tool.poetry.dependencies]
9
  python = "~3.8"
10
  textacy = "^0.12.0"
 
11
  scipy = "~1.8.0"
12
  newspaper3k = "^0.2.8"
13
  gradio = "^3.0.18"
14
  Faker = "^13.13.0"
15
  presidio-analyzer = "^2.2.28"
16
  presidio-anonymizer = "^2.2.28"
17
+ hu-core-news-trf = {url = "https://huggingface.co/huspacy/hu_core_news_trf/resolve/main/hu_core_news_trf-any-py3-none-any.whl"}
18
 
19
  [tool.poetry.dev-dependencies]
20
 
requirements.txt CHANGED
@@ -14,7 +14,7 @@ catalogue==2.0.7; python_version >= "3.8"
14
  certifi==2022.6.15; python_version >= "3.8" and python_version < "4"
15
  cffi==1.15.0; python_version >= "3.7"
16
  charset-normalizer==2.0.12; python_version >= "3.8" and python_version < "4" and python_full_version >= "3.5.0"
17
- click==8.1.3; python_version >= "3.8"
18
  colorama==0.4.5; python_version >= "3.8" and python_full_version < "3.0.0" and platform_system == "Windows" or python_full_version >= "3.5.0" and python_version >= "3.8" and platform_system == "Windows"
19
  cryptography==37.0.2; python_version >= "3.7"
20
  cssselect==1.1.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
@@ -26,13 +26,14 @@ fastapi==0.78.0; python_full_version >= "3.6.1" and python_version >= "3.7"
26
  feedfinder2==0.0.4
27
  feedparser==6.0.10; python_version >= "3.6"
28
  ffmpy==0.3.0; python_version >= "3.7"
29
- filelock==3.7.1; python_version >= "3.7"
30
  fonttools==4.33.3; python_version >= "3.7"
31
  frozenlist==1.3.0; python_version >= "3.7"
32
  fsspec==2022.5.0; python_version >= "3.7"
33
  gradio==3.0.18; python_version >= "3.7"
34
  h11==0.13.0; python_version >= "3.7"
35
- hu-core-news-lg @ https://huggingface.co/huspacy/hu_core_news_lg/resolve/v3.3.0/hu_core_news_lg-any-py3-none-any.whl
 
36
  idna==3.3; python_version >= "3.8" and python_version < "4" and python_full_version >= "3.6.2"
37
  jellyfish==0.9.0; python_version >= "3.8"
38
  jieba3k==0.35.1
@@ -55,7 +56,7 @@ newspaper3k==0.2.8
55
  nltk==3.7; python_version >= "3.7"
56
  numpy==1.22.4
57
  orjson==3.7.2; python_version >= "3.7"
58
- packaging==21.3; python_version >= "3.8"
59
  pandas==1.4.2; python_version >= "3.8"
60
  paramiko==2.11.0; python_version >= "3.7"
61
  pathy==0.6.1; python_version >= "3.8"
@@ -74,10 +75,10 @@ pyphen==0.12.0; python_version >= "3.8"
74
  python-dateutil==2.8.2; python_version >= "3.8" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.8"
75
  python-multipart==0.0.5; python_version >= "3.7"
76
  pytz==2022.1; python_version >= "3.8"
77
- pyyaml==6.0; python_version >= "3.6"
78
- regex==2022.6.2; python_version >= "3.7"
79
  requests-file==1.5.1; python_version >= "3.7"
80
- requests==2.28.0; python_version >= "3.8" and python_version < "4"
81
  scikit-learn==1.1.1; python_version >= "3.8"
82
  scipy==1.8.1; python_version >= "3.8" and python_version < "3.11"
83
  setuptools-scm==6.4.2; python_version >= "3.7"
@@ -86,9 +87,12 @@ six==1.16.0; python_version >= "3.7" and python_full_version < "3.0.0" or python
86
  smart-open==5.2.1; python_version >= "3.8" and python_version < "4.0"
87
  sniffio==1.2.0; python_full_version >= "3.6.2" and python_version >= "3.7"
88
  soupsieve==2.3.2.post1; python_version >= "3.6" and python_full_version >= "3.6.0"
 
 
89
  spacy-legacy==3.0.9; python_version >= "3.8"
90
  spacy-loggers==1.0.2; python_version >= "3.8"
91
- spacy==3.3.1; python_version >= "3.8"
 
92
  srsly==2.4.3; python_version >= "3.8"
93
  starlette==0.19.1; python_full_version >= "3.6.1" and python_version >= "3.7"
94
  textacy==0.12.0; python_version >= "3.8"
@@ -96,11 +100,14 @@ thinc==8.0.17; python_version >= "3.8"
96
  threadpoolctl==3.1.0; python_version >= "3.8"
97
  tinysegmenter==0.3
98
  tldextract==3.3.0; python_version >= "3.7"
 
99
  tomli==2.0.1; python_version >= "3.7"
100
  toolz==0.11.2; python_version >= "3.8"
101
- tqdm==4.64.0; python_version >= "3.8" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.8"
 
 
102
  typer==0.4.1; python_version >= "3.8"
103
- typing-extensions==4.2.0; python_full_version >= "3.6.1" and python_version >= "3.8" and python_version < "3.10"
104
  uc-micro-py==1.0.1; python_version >= "3.7"
105
  urllib3==1.26.9; python_version >= "3.8" and python_full_version < "3.0.0" and python_version < "4" or python_full_version >= "3.5.0" and python_version < "4" and python_version >= "3.8"
106
  uvicorn==0.17.6; python_version >= "3.7"
 
14
  certifi==2022.6.15; python_version >= "3.8" and python_version < "4"
15
  cffi==1.15.0; python_version >= "3.7"
16
  charset-normalizer==2.0.12; python_version >= "3.8" and python_version < "4" and python_full_version >= "3.5.0"
17
+ click==8.0.4; python_version >= "3.8"
18
  colorama==0.4.5; python_version >= "3.8" and python_full_version < "3.0.0" and platform_system == "Windows" or python_full_version >= "3.5.0" and python_version >= "3.8" and platform_system == "Windows"
19
  cryptography==37.0.2; python_version >= "3.7"
20
  cssselect==1.1.0; python_version >= "2.7" and python_full_version < "3.0.0" or python_full_version >= "3.4.0"
 
26
  feedfinder2==0.0.4
27
  feedparser==6.0.10; python_version >= "3.6"
28
  ffmpy==0.3.0; python_version >= "3.7"
29
+ filelock==3.7.1; python_full_version >= "3.7.0" and python_version >= "3.7"
30
  fonttools==4.33.3; python_version >= "3.7"
31
  frozenlist==1.3.0; python_version >= "3.7"
32
  fsspec==2022.5.0; python_version >= "3.7"
33
  gradio==3.0.18; python_version >= "3.7"
34
  h11==0.13.0; python_version >= "3.7"
35
+ hu-core-news-trf @ https://huggingface.co/huspacy/hu_core_news_trf/resolve/main/hu_core_news_trf-any-py3-none-any.whl
36
+ huggingface-hub==0.8.1; python_full_version >= "3.7.0" and python_version >= "3.6"
37
  idna==3.3; python_version >= "3.8" and python_version < "4" and python_full_version >= "3.6.2"
38
  jellyfish==0.9.0; python_version >= "3.8"
39
  jieba3k==0.35.1
 
56
  nltk==3.7; python_version >= "3.7"
57
  numpy==1.22.4
58
  orjson==3.7.2; python_version >= "3.7"
59
+ packaging==21.3; python_full_version >= "3.7.0" and python_version >= "3.8"
60
  pandas==1.4.2; python_version >= "3.8"
61
  paramiko==2.11.0; python_version >= "3.7"
62
  pathy==0.6.1; python_version >= "3.8"
 
75
  python-dateutil==2.8.2; python_version >= "3.8" and python_full_version < "3.0.0" or python_full_version >= "3.3.0" and python_version >= "3.8"
76
  python-multipart==0.0.5; python_version >= "3.7"
77
  pytz==2022.1; python_version >= "3.8"
78
+ pyyaml==6.0; python_full_version >= "3.7.0" and python_version >= "3.6"
79
+ regex==2022.6.2; python_full_version >= "3.7.0" and python_version >= "3.7"
80
  requests-file==1.5.1; python_version >= "3.7"
81
+ requests==2.28.0; python_version >= "3.8" and python_version < "4" and python_full_version >= "3.7.0"
82
  scikit-learn==1.1.1; python_version >= "3.8"
83
  scipy==1.8.1; python_version >= "3.8" and python_version < "3.11"
84
  setuptools-scm==6.4.2; python_version >= "3.7"
 
87
  smart-open==5.2.1; python_version >= "3.8" and python_version < "4.0"
88
  sniffio==1.2.0; python_full_version >= "3.6.2" and python_version >= "3.7"
89
  soupsieve==2.3.2.post1; python_version >= "3.6" and python_full_version >= "3.6.0"
90
+ spacy-alignments==0.8.5; python_version >= "3.6"
91
+ spacy-experimental==0.4.0; python_version >= "3.6"
92
  spacy-legacy==3.0.9; python_version >= "3.8"
93
  spacy-loggers==1.0.2; python_version >= "3.8"
94
+ spacy-transformers==1.1.6; python_version >= "3.6"
95
+ spacy==3.2.4; python_version >= "3.8"
96
  srsly==2.4.3; python_version >= "3.8"
97
  starlette==0.19.1; python_full_version >= "3.6.1" and python_version >= "3.7"
98
  textacy==0.12.0; python_version >= "3.8"
 
100
  threadpoolctl==3.1.0; python_version >= "3.8"
101
  tinysegmenter==0.3
102
  tldextract==3.3.0; python_version >= "3.7"
103
+ tokenizers==0.12.1; python_full_version >= "3.7.0" and python_version >= "3.6"
104
  tomli==2.0.1; python_version >= "3.7"
105
  toolz==0.11.2; python_version >= "3.8"
106
+ torch==1.11.0; python_full_version >= "3.7.0" and python_version >= "3.6"
107
+ tqdm==4.64.0; python_full_version >= "3.7.0" and python_version >= "3.8"
108
+ transformers==4.19.4; python_full_version >= "3.7.0" and python_version >= "3.6"
109
  typer==0.4.1; python_version >= "3.8"
110
+ typing-extensions==4.2.0; python_full_version >= "3.7.0" and python_version >= "3.8" and python_version < "3.10"
111
  uc-micro-py==1.0.1; python_version >= "3.7"
112
  urllib3==1.26.9; python_version >= "3.8" and python_full_version < "3.0.0" and python_version < "4" or python_full_version >= "3.5.0" and python_version < "4" and python_version >= "3.8"
113
  uvicorn==0.17.6; python_version >= "3.7"
resources/triples.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Triples
3
+ -------
4
+
5
+ :mod:`textacy.extract.triples`: Extract structured triples from a document or sentence
6
+ through rule-based pattern-matching of the annotated tokens.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import collections
11
+ from operator import attrgetter
12
+ from typing import Iterable, List, Tuple
13
+
14
+ from spacy.symbols import (
15
+ AUX, VERB,
16
+ agent, attr, aux, auxpass, csubj, csubjpass, dobj, neg, nsubj, nsubjpass, obj, pobj, xcomp,
17
+ )
18
+ from spacy.tokens import Span, Token
19
+
20
+ from textacy import types
21
+
22
+
23
+ _NOMINAL_SUBJ_DEPS = {nsubj, nsubjpass}
24
+ _CLAUSAL_SUBJ_DEPS = {csubj, csubjpass}
25
+ _ACTIVE_SUBJ_DEPS = {csubj, nsubj}
26
+ _VERB_MODIFIER_DEPS = {aux, auxpass, neg}
27
+
28
+ SVOTriple: Tuple[List[Token], List[Token], List[Token]] = collections.namedtuple(
29
+ "SVOTriple", ["subject", "verb", "object"]
30
+ )
31
+
32
+
33
+ def subject_verb_object_triples(doclike: types.DocLike) -> Iterable[SVOTriple]:
34
+ """
35
+ Extract an ordered sequence of subject-verb-object triples from a document
36
+ or sentence.
37
+
38
+ Args:
39
+ doclike
40
+
41
+ Yields:
42
+ Next SVO triple as (subject, verb, object), in approximate order of appearance.
43
+ """
44
+ if isinstance(doclike, Span):
45
+ sents = [doclike]
46
+ else:
47
+ sents = doclike.sents
48
+
49
+ for sent in sents:
50
+ # connect subjects/objects to direct verb heads
51
+ # and expand them to include conjuncts, compound nouns, ...
52
+ verb_sos = collections.defaultdict(lambda: collections.defaultdict(set))
53
+ for tok in sent:
54
+ head = tok.head
55
+ # ensure entry for all verbs, even if empty
56
+ # to catch conjugate verbs without direct subject/object deps
57
+ if tok.pos == VERB:
58
+ _ = verb_sos[tok]
59
+ # nominal subject of active or passive verb
60
+ if tok.dep in _NOMINAL_SUBJ_DEPS:
61
+ if head.pos == VERB:
62
+ verb_sos[head]["subjects"].update(expand_noun(tok))
63
+ # clausal subject of active or passive verb
64
+ elif tok.dep in _CLAUSAL_SUBJ_DEPS:
65
+ if head.pos == VERB:
66
+ verb_sos[head]["subjects"].update(tok.subtree)
67
+ # nominal direct object of transitive verb
68
+ elif tok.dep == obj:
69
+ if head.pos == VERB:
70
+ verb_sos[head]["objects"].update(expand_noun(tok))
71
+ # prepositional object acting as agent of passive verb
72
+ elif tok.dep == pobj:
73
+ if head.dep == agent and head.head.pos == VERB:
74
+ verb_sos[head.head]["objects"].update(expand_noun(tok))
75
+ # open clausal complement, but not as a secondary predicate
76
+ elif tok.dep == xcomp:
77
+ if (
78
+ head.pos == VERB
79
+ and not any(child.dep == obj for child in head.children)
80
+ ):
81
+ # TODO: just the verb, or the whole tree?
82
+ # verb_sos[verb]["objects"].update(expand_verb(tok))
83
+ verb_sos[head]["objects"].update(tok.subtree)
84
+ # fill in any indirect relationships connected via verb conjuncts
85
+ for verb, so_dict in verb_sos.items():
86
+ conjuncts = verb.conjuncts
87
+ if so_dict.get("subjects"):
88
+ for conj in conjuncts:
89
+ conj_so_dict = verb_sos.get(conj)
90
+ if conj_so_dict and not conj_so_dict.get("subjects"):
91
+ conj_so_dict["subjects"].update(so_dict["subjects"])
92
+ if not so_dict.get("objects"):
93
+ so_dict["objects"].update(
94
+ obj
95
+ for conj in conjuncts
96
+ for obj in verb_sos.get(conj, {}).get("objects", [])
97
+ )
98
+ # expand verbs and restructure into svo triples
99
+ for verb, so_dict in verb_sos.items():
100
+ if so_dict["subjects"] and so_dict["objects"]:
101
+ yield SVOTriple(
102
+ subject=sorted(so_dict["subjects"], key=attrgetter("i")),
103
+ verb=sorted(expand_verb(verb), key=attrgetter("i")),
104
+ object=sorted(so_dict["objects"], key=attrgetter("i")),
105
+ )
106
+
107
+ def expand_noun(tok: Token) -> List[Token]:
108
+ """Expand a noun token to include all associated conjunct and compound nouns."""
109
+ tok_and_conjuncts = [tok] + list(tok.conjuncts)
110
+ compounds = [
111
+ child
112
+ for tc in tok_and_conjuncts
113
+ for child in tc.children
114
+ # TODO: why doesn't compound import from spacy.symbols?
115
+ if child.dep_ == "compound"
116
+ ]
117
+ return tok_and_conjuncts + compounds
118
+
119
+
120
+ def expand_verb(tok: Token) -> List[Token]:
121
+ """Expand a verb token to include all associated auxiliary and negation tokens."""
122
+ verb_modifiers = [
123
+ child for child in tok.children if child.dep in _VERB_MODIFIER_DEPS
124
+ ]
125
+ return [tok] + verb_modifiers