Spaces:
Running
Running
roni
commited on
Commit
·
27e2770
1
Parent(s):
35d4339
initial protein search engine implementation
Browse files- .gitignore +4 -0
- Makefile +15 -0
- __init__.py +0 -0
- app.py +36 -0
- credentials.py +5 -0
- get_index.py +23 -0
- pylintrc +20 -0
- requirements-dev.txt +5 -0
- requirements.txt +3 -0
.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.idea
|
2 |
+
venv
|
3 |
+
__pycache__
|
4 |
+
scratch
|
Makefile
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
setup:
|
2 |
+
python -m venv venv
|
3 |
+
venv/bin/pip install -r requirements.txt
|
4 |
+
venv/bin/pip install -r requirements-dev.txt
|
5 |
+
|
6 |
+
format:
|
7 |
+
venv/bin/black .
|
8 |
+
|
9 |
+
check-code: lint-python check-formatting
|
10 |
+
|
11 |
+
check-formatting:
|
12 |
+
venv/bin/black --check .
|
13 |
+
|
14 |
+
lint-python:
|
15 |
+
venv/bin/pylint --rcfile=pylintrc .
|
__init__.py
ADDED
File without changes
|
app.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
from get_index import get_engine
|
4 |
+
|
5 |
+
index_repo = "ronig/siamese_protein_index"
|
6 |
+
model_repo = "ronig/protein_search_engine"
|
7 |
+
engine = get_engine(index_repo, model_repo)
|
8 |
+
|
9 |
+
|
10 |
+
def search(seq, n_res):
|
11 |
+
n_res = int(limit_n_results(n_res))
|
12 |
+
search_results = engine.search_by_sequence(seq, n=n_res)
|
13 |
+
outputs = {}
|
14 |
+
for res in search_results:
|
15 |
+
prot = res["protein_name"][0]
|
16 |
+
chain = res["chain_id"][0]
|
17 |
+
value = res["score"]
|
18 |
+
key = f"Protein: {prot} | Chain: {chain}"
|
19 |
+
outputs[key] = value
|
20 |
+
return outputs
|
21 |
+
|
22 |
+
|
23 |
+
def limit_n_results(n):
|
24 |
+
return max(min(n, 20), 1)
|
25 |
+
|
26 |
+
|
27 |
+
with gr.Blocks() as demo:
|
28 |
+
with gr.Row():
|
29 |
+
with gr.Column():
|
30 |
+
seq_input = gr.Textbox("KFLIYQMECSTMIFGL", label="Input Sequence")
|
31 |
+
n_results = gr.Number(5, label="N Results")
|
32 |
+
search_button = gr.Button("Search")
|
33 |
+
output = gr.Label(num_top_classes=20, label="Search Results")
|
34 |
+
search_button.click(search, inputs=[seq_input, n_results], outputs=output)
|
35 |
+
|
36 |
+
demo.launch()
|
credentials.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
|
4 |
+
def get_token():
|
5 |
+
return os.environ.get("TOKEN", True)
|
get_index.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
from pathlib import Path
|
3 |
+
|
4 |
+
from huggingface_hub import snapshot_download
|
5 |
+
|
6 |
+
from credentials import get_token
|
7 |
+
|
8 |
+
|
9 |
+
def get_engine(index_repo: str, model_repo: str):
|
10 |
+
index_path = Path(
|
11 |
+
snapshot_download(
|
12 |
+
index_repo, use_auth_token=get_token(), repo_type="dataset"
|
13 |
+
)
|
14 |
+
)
|
15 |
+
local_arch_path = Path(
|
16 |
+
snapshot_download(
|
17 |
+
model_repo, use_auth_token=get_token(), repo_type="model"
|
18 |
+
)
|
19 |
+
)
|
20 |
+
sys.path.append(str(local_arch_path))
|
21 |
+
from protein_index import ProteinSearchEngine # pylint: disable=import-error,import-outside-toplevel
|
22 |
+
|
23 |
+
return ProteinSearchEngine(data_path=index_path)
|
pylintrc
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[MESSAGES CONTROL]
|
2 |
+
disable=missing-docstring,invalid-name,logging-fstring-interpolation
|
3 |
+
|
4 |
+
[DESIGN]
|
5 |
+
min-public-methods=1
|
6 |
+
|
7 |
+
[FORMAT]
|
8 |
+
max-line-length=88
|
9 |
+
|
10 |
+
[SIMILARITIES]
|
11 |
+
min-similarity-lines=10
|
12 |
+
|
13 |
+
[TYPECHECK]
|
14 |
+
|
15 |
+
[MASTER]
|
16 |
+
init-hook=import sys; sys.path.append(".")
|
17 |
+
extension-pkg-whitelist=pydantic,cassandra
|
18 |
+
generated-members=torch.*,cv2.*,np.random.*
|
19 |
+
ignore-patterns=setup,py,tasks.py
|
20 |
+
max-args=6
|
requirements-dev.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pytest
|
2 |
+
pylint
|
3 |
+
black
|
4 |
+
mypy
|
5 |
+
huggingface_hub
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
transformers
|
3 |
+
annoy
|