Martino Mensio commited on
Commit
d6504ae
β€’
1 Parent(s): e8ef0d6

updated to spacy v3:

Browse files

- pipeline stage added by name
- declared entry_point to shorten set-up
- changed directory name to match package name

.gitignore CHANGED
@@ -2,4 +2,143 @@ data/*
2
  .idea
3
  *.log
4
  .ipynb_checkpoints
5
- data_spacy_entity_linker
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  .idea
3
  *.log
4
  .ipynb_checkpoints
5
+ data_spacy_entity_linker
6
+
7
+ # Byte-compiled / optimized / DLL files
8
+ __pycache__/
9
+ *.py[cod]
10
+ *$py.class
11
+
12
+ # C extensions
13
+ *.so
14
+
15
+ # Distribution / packaging
16
+ .Python
17
+ build/
18
+ develop-eggs/
19
+ dist/
20
+ downloads/
21
+ eggs/
22
+ .eggs/
23
+ lib/
24
+ lib64/
25
+ parts/
26
+ sdist/
27
+ var/
28
+ wheels/
29
+ share/python-wheels/
30
+ *.egg-info/
31
+ .installed.cfg
32
+ *.egg
33
+ MANIFEST
34
+
35
+ # PyInstaller
36
+ # Usually these files are written by a python script from a template
37
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
38
+ *.manifest
39
+ *.spec
40
+
41
+ # Installer logs
42
+ pip-log.txt
43
+ pip-delete-this-directory.txt
44
+
45
+ # Unit test / coverage reports
46
+ htmlcov/
47
+ .tox/
48
+ .nox/
49
+ .coverage
50
+ .coverage.*
51
+ .cache
52
+ nosetests.xml
53
+ coverage.xml
54
+ *.cover
55
+ *.py,cover
56
+ .hypothesis/
57
+ .pytest_cache/
58
+ cover/
59
+
60
+ # Translations
61
+ *.mo
62
+ *.pot
63
+
64
+ # Django stuff:
65
+ *.log
66
+ local_settings.py
67
+ db.sqlite3
68
+ db.sqlite3-journal
69
+
70
+ # Flask stuff:
71
+ instance/
72
+ .webassets-cache
73
+
74
+ # Scrapy stuff:
75
+ .scrapy
76
+
77
+ # Sphinx documentation
78
+ docs/_build/
79
+
80
+ # PyBuilder
81
+ .pybuilder/
82
+ target/
83
+
84
+ # Jupyter Notebook
85
+ .ipynb_checkpoints
86
+
87
+ # IPython
88
+ profile_default/
89
+ ipython_config.py
90
+
91
+ # pyenv
92
+ # For a library or package, you might want to ignore these files since the code is
93
+ # intended to run in multiple environments; otherwise, check them in:
94
+ # .python-version
95
+
96
+ # pipenv
97
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
98
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
99
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
100
+ # install all needed dependencies.
101
+ #Pipfile.lock
102
+
103
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
104
+ __pypackages__/
105
+
106
+ # Celery stuff
107
+ celerybeat-schedule
108
+ celerybeat.pid
109
+
110
+ # SageMath parsed files
111
+ *.sage.py
112
+
113
+ # Environments
114
+ .env
115
+ .venv
116
+ env/
117
+ venv/
118
+ ENV/
119
+ env.bak/
120
+ venv.bak/
121
+
122
+ # Spyder project settings
123
+ .spyderproject
124
+ .spyproject
125
+
126
+ # Rope project settings
127
+ .ropeproject
128
+
129
+ # mkdocs documentation
130
+ /site
131
+
132
+ # mypy
133
+ .mypy_cache/
134
+ .dmypy.json
135
+ dmypy.json
136
+
137
+ # Pyre type checker
138
+ .pyre/
139
+
140
+ # pytype static type analyzer
141
+ .pytype/
142
+
143
+ # Cython debug symbols
144
+ cython_debug/
README.md CHANGED
@@ -22,16 +22,12 @@ It also comes along with a number of disadvantages:
22
  ## Use
23
  ```python
24
  import spacy
25
- from spacyEntityLinker import EntityLinker
26
-
27
- #Initialize Entity Linker
28
- entityLinker = EntityLinker()
29
 
30
  #initialize language model
31
  nlp = spacy.load("en_core_web_sm")
32
 
33
- #add pipeline
34
- nlp.add_pipe(entityLinker, last=True, name="entityLinker")
35
 
36
  doc = nlp("I watched the Pirates of the Carribean last silvester")
37
 
@@ -135,7 +131,7 @@ To install the package run: <code>pip install spacy-entity-linker</code>
135
 
136
  Afterwards, the knowledge base (Wikidata) must be downloaded. This can be done by calling
137
 
138
- <code>python -m spacyEntityLinker "download_knowledge_base"</code>
139
 
140
  This will download and extract a ~500mb file that contains a preprocessed version of Wikidata
141
 
 
22
  ## Use
23
  ```python
24
  import spacy
 
 
 
 
25
 
26
  #initialize language model
27
  nlp = spacy.load("en_core_web_sm")
28
 
29
+ #add pipeline (declared through entry_points in setup.py)
30
+ nlp.add_pipe("entityLinker", last=True)
31
 
32
  doc = nlp("I watched the Pirates of the Carribean last silvester")
33
 
 
131
 
132
  Afterwards, the knowledge base (Wikidata) must be downloaded. This can be done by calling
133
 
134
+ <code>python -m spacy_entity_linker "download_knowledge_base"</code>
135
 
136
  This will download and extract a ~500mb file that contains a preprocessed version of Wikidata
137
 
requirements.txt CHANGED
@@ -1 +1 @@
1
- spacy>=2.1.9
 
1
+ -e .
setup.py CHANGED
@@ -5,7 +5,6 @@
5
 
6
  # This file is part of fuzzywuzzy.
7
 
8
- from spacyEntityLinker import __version__
9
  import os
10
 
11
  try:
@@ -23,10 +22,10 @@ with open("README.md", "r") as fh:
23
 
24
  setup(
25
  name='spacy-entity-linker',
26
- version=__version__,
27
  author='Emanuel Gerber',
28
  author_email='emanuel.j.gerber@gmail.com',
29
- packages=['spacyEntityLinker'],
30
  url='https://github.com/egerber/spacy-entity-linker',
31
  license="MIT",
32
  classifiers=["Environment :: Console",
@@ -45,4 +44,11 @@ setup(
45
  long_description=long_description,
46
  long_description_content_type="text/markdown",
47
  zip_safe=True,
 
 
 
 
 
 
 
48
  )
 
5
 
6
  # This file is part of fuzzywuzzy.
7
 
 
8
  import os
9
 
10
  try:
 
22
 
23
  setup(
24
  name='spacy-entity-linker',
25
+ version='0.0.6',
26
  author='Emanuel Gerber',
27
  author_email='emanuel.j.gerber@gmail.com',
28
+ packages=['spacy_entity_linker'],
29
  url='https://github.com/egerber/spacy-entity-linker',
30
  license="MIT",
31
  classifiers=["Environment :: Console",
 
44
  long_description=long_description,
45
  long_description_content_type="text/markdown",
46
  zip_safe=True,
47
+ install_requires=[
48
+ 'spacy>=3.0.0',
49
+ 'numpy>=1.0.0'
50
+ ],
51
+ entry_points={
52
+ 'spacy_factories': 'entityLinker = spacy_entity_linker.EntityLinker:EntityLinker'
53
+ }
54
  )
spacyEntityLinker/__init__.py DELETED
@@ -1,4 +0,0 @@
1
- from .EntityLinker import EntityLinker
2
-
3
- __version__ = '0.0.5'
4
- __all__ = [EntityLinker]
 
 
 
 
 
{spacyEntityLinker β†’ spacy_entity_linker}/DatabaseConnection.py RENAMED
File without changes
{spacyEntityLinker β†’ spacy_entity_linker}/EntityCandidates.py RENAMED
File without changes
{spacyEntityLinker β†’ spacy_entity_linker}/EntityClassifier.py RENAMED
File without changes
{spacyEntityLinker β†’ spacy_entity_linker}/EntityCollection.py RENAMED
@@ -1,5 +1,5 @@
1
  from collections import Counter, defaultdict
2
- from spacyEntityLinker.DatabaseConnection import get_wikidata_instance
3
 
4
 
5
  class EntityCollection:
 
1
  from collections import Counter, defaultdict
2
+ from .DatabaseConnection import get_wikidata_instance
3
 
4
 
5
  class EntityCollection:
{spacyEntityLinker β†’ spacy_entity_linker}/EntityElement.py RENAMED
@@ -1,5 +1,5 @@
1
- from spacyEntityLinker.DatabaseConnection import get_wikidata_instance
2
- from spacyEntityLinker.EntityCollection import EntityCollection
3
 
4
 
5
  class EntityElement:
 
1
+ from .DatabaseConnection import get_wikidata_instance
2
+ from .EntityCollection import EntityCollection
3
 
4
 
5
  class EntityElement:
{spacyEntityLinker β†’ spacy_entity_linker}/EntityLinker.py RENAMED
@@ -1,12 +1,14 @@
1
- from spacyEntityLinker.EntityClassifier import EntityClassifier
2
- from spacyEntityLinker.EntityCollection import EntityCollection
3
- from spacyEntityLinker.TermCandidateExtractor import TermCandidateExtractor
4
  from spacy.tokens import Doc, Span
 
5
 
 
 
 
6
 
 
7
  class EntityLinker:
8
 
9
- def __init__(self):
10
  Doc.set_extension("linkedEntities", default=EntityCollection(), force=True)
11
  Span.set_extension("linkedEntities", default=None, force=True)
12
 
 
 
 
 
1
  from spacy.tokens import Doc, Span
2
+ from spacy.language import Language
3
 
4
+ from .EntityClassifier import EntityClassifier
5
+ from .EntityCollection import EntityCollection
6
+ from .TermCandidateExtractor import TermCandidateExtractor
7
 
8
+ @Language.factory('entityLinker')
9
  class EntityLinker:
10
 
11
+ def __init__(self, nlp, name):
12
  Doc.set_extension("linkedEntities", default=EntityCollection(), force=True)
13
  Span.set_extension("linkedEntities", default=None, force=True)
14
 
{spacyEntityLinker β†’ spacy_entity_linker}/TermCandidate.py RENAMED
@@ -1,6 +1,6 @@
1
- from spacyEntityLinker.EntityCandidates import EntityCandidates
2
- from spacyEntityLinker.EntityElement import EntityElement
3
- from spacyEntityLinker.DatabaseConnection import get_wikidata_instance
4
 
5
 
6
  class TermCandidate:
 
1
+ from .EntityCandidates import EntityCandidates
2
+ from .EntityElement import EntityElement
3
+ from .DatabaseConnection import get_wikidata_instance
4
 
5
 
6
  class TermCandidate:
{spacyEntityLinker β†’ spacy_entity_linker}/TermCandidateExtractor.py RENAMED
@@ -1,4 +1,4 @@
1
- from spacyEntityLinker.TermCandidate import TermCandidate
2
 
3
 
4
  class TermCandidateExtractor:
 
1
+ from .TermCandidate import TermCandidate
2
 
3
 
4
  class TermCandidateExtractor:
spacy_entity_linker/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ try: # Python 3.8
2
+ import importlib.metadata as importlib_metadata
3
+ except ImportError:
4
+ import importlib_metadata # noqa: F401
5
+
6
+ from .EntityLinker import EntityLinker
7
+
8
+ print(__name__)
9
+ pkg_meta = importlib_metadata.metadata(__name__.split(".")[0])
10
+ __version__ = pkg_meta["version"]
11
+ __all__ = [EntityLinker]
{spacyEntityLinker β†’ spacy_entity_linker}/__main__.py RENAMED
File without changes
tests/test_EntityLinker.py CHANGED
@@ -1,6 +1,6 @@
1
  import unittest
2
  import spacy
3
- from spacyEntityLinker.EntityLinker import EntityLinker
4
 
5
 
6
  class TestEntityLinker(unittest.TestCase):
@@ -10,9 +10,8 @@ class TestEntityLinker(unittest.TestCase):
10
  self.nlp = spacy.load('en_core_web_sm')
11
 
12
  def test_initialization(self):
13
- entityLinker = EntityLinker()
14
 
15
- self.nlp.add_pipe(entityLinker, last=True, name="entityLinker")
16
 
17
  doc = self.nlp(
18
  "Elon Musk was born in South Africa. Bill Gates and Steve Jobs come from in the United States")
 
1
  import unittest
2
  import spacy
3
+ from spacy_entity_linker.EntityLinker import EntityLinker
4
 
5
 
6
  class TestEntityLinker(unittest.TestCase):
 
10
  self.nlp = spacy.load('en_core_web_sm')
11
 
12
  def test_initialization(self):
 
13
 
14
+ self.nlp.add_pipe("entityLinker", last=True)
15
 
16
  doc = self.nlp(
17
  "Elon Musk was born in South Africa. Bill Gates and Steve Jobs come from in the United States")
tests/test_TermCandidateExtractor.py CHANGED
@@ -1,6 +1,6 @@
1
  import unittest
2
  import spacy
3
- import spacyEntityLinker.TermCandidateExtractor
4
 
5
 
6
  class TestCandidateExtractor(unittest.TestCase):
 
1
  import unittest
2
  import spacy
3
+ import spacy_entity_linker.TermCandidateExtractor
4
 
5
 
6
  class TestCandidateExtractor(unittest.TestCase):