apjanco commited on
Commit
26e1bea
1 Parent(s): 7136222

add support for scispacy

Browse files
Files changed (4) hide show
  1. app.py +13 -6
  2. models.json +13 -1
  3. requirements.txt +137 -2
  4. scispacy.json +10 -0
app.py CHANGED
@@ -2,6 +2,8 @@ import streamlit as st
2
  import textract
3
  import tempfile
4
  import spacy
 
 
5
  from spacy.tokens import DocBin, Doc, Span
6
  from collections import Counter
7
  import srsly
@@ -16,12 +18,17 @@ if 'query' not in st.session_state:
16
  st.session_state['query'] = ''
17
 
18
  @st.cache
19
- def download_model(select_model:str):
20
- try:
21
- spacy.cli.download(select_model)
 
22
  return True
23
- except Exception as e:
24
- return False
 
 
 
 
25
 
26
  def search_docs(query:str, documents:list[Doc], nlp) -> list[Span]:
27
  terms = query.split('|')
@@ -47,7 +54,7 @@ language = st.selectbox("Language", languages, index=len(models.keys())-1, help=
47
  if language:
48
  select_model = st.selectbox("Model", models[language], help="spaCy model")
49
  if select_model:
50
- model_downloaded = download_model(select_model)
51
 
52
  if model_downloaded:
53
 
 
2
  import textract
3
  import tempfile
4
  import spacy
5
+ import subprocess
6
+ import scispacy
7
  from spacy.tokens import DocBin, Doc, Span
8
  from collections import Counter
9
  import srsly
 
18
  st.session_state['query'] = ''
19
 
20
  @st.cache
21
+ def download_model(language:str, select_model:str):
22
+ if language == 'Science':
23
+ urls = srsly.read_json('scispacy.json')
24
+ subprocess.run(['pip', 'install', f'{urls[select_model]}'])
25
  return True
26
+ else:
27
+ try:
28
+ spacy.cli.download(select_model)
29
+ return True
30
+ except Exception as e:
31
+ return False
32
 
33
  def search_docs(query:str, documents:list[Doc], nlp) -> list[Span]:
34
  terms = query.split('|')
 
54
  if language:
55
  select_model = st.selectbox("Model", models[language], help="spaCy model")
56
  if select_model:
57
+ model_downloaded = download_model(language, select_model)
58
 
59
  if model_downloaded:
60
 
models.json CHANGED
@@ -144,6 +144,18 @@
144
  "sv_core_news_md",
145
  "sv_core_news_lg"
146
 
147
- ]
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
  }
 
144
  "sv_core_news_md",
145
  "sv_core_news_lg"
146
 
147
+ ],
148
+ "Science":[
149
+ "",
150
+ "en_core_sci_sm",
151
+ "en_core_sci_md",
152
+ "en_ner_craft_md",
153
+ "en_ner_jnlpba_md",
154
+ "en_ner_bc5cdr_md",
155
+ "en_ner_bionlp13cg_md",
156
+ "en_core_sci_lg",
157
+ "en_core_sci_scibert"
158
+
159
+ ]
160
 
161
  }
requirements.txt CHANGED
@@ -1,4 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  streamlit==1.11.1
 
2
  textract==1.6.5
3
- spacy==3.4.1
4
- #spacy-streamlit==1.0.4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ altair==4.2.0
2
+ argcomplete==1.10.3
3
+ argon2-cffi==21.3.0
4
+ argon2-cffi-bindings==21.2.0
5
+ asttokens==2.0.7
6
+ attrs==22.1.0
7
+ backcall==0.2.0
8
+ beautifulsoup4==4.8.2
9
+ bleach==5.0.1
10
+ blinker==1.5
11
+ blis==0.7.8
12
+ cachetools==5.2.0
13
+ catalogue==2.0.8
14
+ certifi==2022.6.15
15
+ cffi==1.15.1
16
+ chardet==3.0.4
17
+ charset-normalizer==2.1.0
18
+ click==8.0.4
19
+ commonmark==0.9.1
20
+ compressed-rtf==1.0.6
21
+ conllu==4.5.2
22
+ cymem==2.0.6
23
+ debugpy==1.6.2
24
+ decorator==5.1.1
25
+ defusedxml==0.7.1
26
+ docx2txt==0.8
27
+ ebcdic==1.1.1
28
+ en-core-sci-sm==0.5.0
29
+ en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0-py3-none-any.whl
30
+ entrypoints==0.4
31
+ executing==0.9.1
32
+ extract-msg==0.28.7
33
+ fastjsonschema==2.16.1
34
+ gitdb==4.0.9
35
+ GitPython==3.1.27
36
+ idna==3.3
37
+ IMAPClient==2.1.0
38
+ importlib-metadata==4.12.0
39
+ ipykernel==6.15.1
40
+ ipython==8.4.0
41
+ ipython-genutils==0.2.0
42
+ ipywidgets==7.7.1
43
+ jedi==0.18.1
44
+ Jinja2==3.1.2
45
+ joblib==1.1.0
46
+ jsonschema==4.9.1
47
+ jupyter-client==7.3.4
48
+ jupyter-core==4.11.1
49
+ jupyterlab-pygments==0.2.2
50
+ jupyterlab-widgets==1.1.1
51
+ langcodes==3.3.0
52
+ lxml==4.9.1
53
+ MarkupSafe==2.1.1
54
+ matplotlib-inline==0.1.3
55
+ mistune==0.8.4
56
+ murmurhash==1.0.7
57
+ nbclient==0.6.6
58
+ nbconvert==6.5.1
59
+ nbformat==5.4.0
60
+ nest-asyncio==1.5.5
61
+ nmslib==2.1.1
62
+ notebook==6.4.12
63
+ numpy==1.23.1
64
+ olefile==0.46
65
+ packaging==21.3
66
+ pandas==1.4.3
67
+ pandocfilters==1.5.0
68
+ parso==0.8.3
69
+ pathy==0.6.2
70
+ pdfminer.six==20191110
71
+ pexpect==4.8.0
72
+ pickleshare==0.7.5
73
+ Pillow==9.2.0
74
+ preshed==3.0.6
75
+ prometheus-client==0.14.1
76
+ prompt-toolkit==3.0.30
77
+ protobuf==3.20.1
78
+ psutil==5.9.1
79
+ ptyprocess==0.7.0
80
+ pure-eval==0.2.2
81
+ pyarrow==9.0.0
82
+ pybind11==2.6.1
83
+ pycparser==2.21
84
+ pycryptodome==3.15.0
85
+ pydantic==1.8.2
86
+ pydeck==0.7.1
87
+ Pygments==2.12.0
88
+ Pympler==1.0.1
89
+ pyparsing==3.0.9
90
+ pyrsistent==0.18.1
91
+ pysbd==0.3.4
92
+ python-dateutil==2.8.2
93
+ python-pptx==0.6.21
94
+ pytz==2022.1
95
+ pytz-deprecation-shim==0.1.0.post0
96
+ pyzmq==23.2.0
97
+ requests==2.28.1
98
+ rich==12.5.1
99
+ scikit-learn==1.1.2
100
+ scipy==1.9.0
101
+ scispacy==0.5.0
102
+ semver==2.13.0
103
+ Send2Trash==1.8.0
104
+ six==1.12.0
105
+ smart-open==5.2.1
106
+ smmap==5.0.0
107
+ sortedcontainers==2.4.0
108
+ soupsieve==2.3.2.post1
109
+ spacy==3.2.4
110
+ spacy-legacy==3.0.9
111
+ spacy-loggers==1.0.3
112
+ SpeechRecognition==3.8.1
113
+ srsly==2.4.4
114
+ stack-data==0.3.0
115
  streamlit==1.11.1
116
+ terminado==0.15.0
117
  textract==1.6.5
118
+ thinc==8.0.17
119
+ threadpoolctl==3.1.0
120
+ tinycss2==1.1.1
121
+ toml==0.10.2
122
+ toolz==0.12.0
123
+ tornado==6.2
124
+ tqdm==4.64.0
125
+ traitlets==5.3.0
126
+ typer==0.4.2
127
+ typing_extensions==4.3.0
128
+ tzdata==2022.1
129
+ tzlocal==4.2
130
+ urllib3==1.26.11
131
+ validators==0.20.0
132
+ wasabi==0.10.1
133
+ watchdog==2.1.9
134
+ wcwidth==0.2.5
135
+ webencodings==0.5.1
136
+ widgetsnbextension==3.6.1
137
+ xlrd==1.2.0
138
+ XlsxWriter==3.0.3
139
+ zipp==3.8.1
scispacy.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "en_core_sci_sm": "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.0/en_core_sci_sm-0.5.0.tar.gz",
3
+ "en_core_sci_md": "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.0/en_core_sci_md-0.5.0.tar.gz",
4
+ "en_ner_craft_md": "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.0/en_ner_craft_md-0.5.0.tar.gz",
5
+ "en_ner_jnlpba_md": "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.0/en_ner_jnlpba_md-0.5.0.tar.gz",
6
+ "en_ner_bc5cdr_md": "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.0/en_ner_bc5cdr_md-0.5.0.tar.gz",
7
+ "en_ner_bionlp13cg_md": "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.0/en_ner_bionlp13cg_md-0.5.0.tar.gz",
8
+ "en_core_sci_lg": "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.0/en_core_sci_lg-0.5.0.tar.gz",
9
+ "en_core_sci_scibert": "https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.0/en_core_sci_scibert-0.5.0.tar.gz"
10
+ }