Spaces:
Running
Running
Pass URI through GET config
Browse files- Dockerfile +1 -1
- app/main.py +40 -32
Dockerfile
CHANGED
@@ -54,4 +54,4 @@ EXPOSE 7681
|
|
54 |
# Set the working directory where your app resides
|
55 |
|
56 |
# Command to run the Gradio app automatically
|
57 |
-
CMD ["python", "app/main.py", "-p", "7681", "-s", "-
|
|
|
54 |
# Set the working directory where your app resides
|
55 |
|
56 |
# Command to run the Gradio app automatically
|
57 |
+
CMD ["python", "app/main.py", "-p", "7681", "-s", "-u", "s3://2023-get-xf2217/get_demo", "-d", "/data"]
|
app/main.py
CHANGED
@@ -6,34 +6,31 @@ import matplotlib.pyplot as plt
|
|
6 |
import pandas as pd
|
7 |
import pkg_resources
|
8 |
from dash_bio import Clustergram
|
9 |
-
from proscope.data import get_genename_to_uniprot, get_lddt, get_seq
|
10 |
-
|
11 |
-
seq = get_seq()
|
12 |
-
genename_to_uniprot = get_genename_to_uniprot()
|
13 |
-
lddt = get_lddt()
|
14 |
import sys
|
15 |
import s3fs
|
16 |
from glob import glob
|
17 |
-
|
18 |
import numpy as np
|
|
|
19 |
from atac_rna_data_processing.config.load_config import load_config
|
20 |
from atac_rna_data_processing.io.celltype import GETCellType
|
21 |
from atac_rna_data_processing.io.nr_motif_v1 import NrMotifV1
|
22 |
from proscope.af2 import AFPairseg
|
|
|
23 |
from proscope.protein import Protein
|
24 |
from proscope.viewer import view_pdb_html
|
25 |
|
|
|
|
|
|
|
|
|
|
|
26 |
args = argparse.ArgumentParser()
|
27 |
args.add_argument("-p", "--port", type=int, default=7860, help="Port number")
|
28 |
args.add_argument("-s", "--share", action="store_true", help="Share on network")
|
29 |
-
args.add_argument("-d", "--data", type=str, default="/data", help="Data directory")
|
30 |
args.add_argument("-u", "--s3_uri", type=str, default="None", help="Path to demo S3 bucket")
|
|
|
31 |
args = args.parse_args()
|
32 |
-
# set pseudo args
|
33 |
-
# args = args.parse_args(['-p', '7869', '-s', '-d', '/manitou/pmg/users/xf2217/demo_data'])
|
34 |
|
35 |
-
gene_pairs = glob(f"{args.s3_path}/structures/causal/*")
|
36 |
-
gene_pairs = [os.path.basename(pair) for pair in gene_pairs]
|
37 |
GET_CONFIG = load_config(
|
38 |
"/app/modules/atac_rna_data_processing/atac_rna_data_processing/config/GET"
|
39 |
)
|
@@ -41,46 +38,57 @@ GET_CONFIG.celltype.jacob = True
|
|
41 |
GET_CONFIG.celltype.num_cls = 2
|
42 |
GET_CONFIG.celltype.input = True
|
43 |
GET_CONFIG.celltype.embed = True
|
44 |
-
GET_CONFIG.celltype.data_dir = (
|
45 |
-
f"{args.s3_path}/pretrain_human_bingren_shendure_apr2023/fetal_adult/"
|
46 |
-
)
|
47 |
-
GET_CONFIG.celltype.interpret_dir = (
|
48 |
-
f"{args.s3_path}/Interpretation_all_hg38_allembed_v4_natac/"
|
49 |
-
)
|
50 |
-
GET_CONFIG.motif_dir = "/manitou/pmg/users/xf2217/interpret_natac/motif-clustering"
|
51 |
-
motif = NrMotifV1.load_from_pickle(
|
52 |
-
pkg_resources.resource_filename("atac_rna_data_processing", "data/NrMotifV1.pkl"),
|
53 |
-
# GET_CONFIG.motif_dir,
|
54 |
-
)
|
55 |
-
GET_CONFIG.s3_path = args.s3_path if args.s3_path else None
|
56 |
|
57 |
-
|
58 |
-
GET_CONFIG.
|
59 |
-
+ "data/cell_type_pretrain_human_bingren_shendure_apr2023.txt"
|
60 |
-
)
|
61 |
-
cell_type_id_to_name = dict(zip(cell_type_annot["id"], cell_type_annot["celltype"]))
|
62 |
-
cell_type_name_to_id = dict(zip(cell_type_annot["celltype"], cell_type_annot["id"]))
|
63 |
-
if GET_CONFIG.s3_path:
|
64 |
s3 = s3fs.S3FileSystem()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
available_celltypes = sorted(
|
66 |
[
|
67 |
cell_type_id_to_name[f.split("/")[-1]]
|
68 |
for f in s3.glob(GET_CONFIG.celltype.interpret_dir + "*")
|
69 |
]
|
70 |
)
|
|
|
71 |
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
available_celltypes = sorted(
|
73 |
[
|
74 |
cell_type_id_to_name[f.split("/")[-1]]
|
75 |
for f in glob(GET_CONFIG.celltype.interpret_dir + "*")
|
76 |
]
|
77 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
plt.rcParams["figure.dpi"] = 100
|
79 |
|
80 |
|
81 |
def visualize_AF2(tf_pair, a):
|
82 |
-
strcture_dir = f"{args.
|
83 |
-
fasta_dir = f"{args.
|
84 |
if not os.path.exists(strcture_dir):
|
85 |
gr.ErrorText("No such gene pair")
|
86 |
|
|
|
6 |
import pandas as pd
|
7 |
import pkg_resources
|
8 |
from dash_bio import Clustergram
|
|
|
|
|
|
|
|
|
|
|
9 |
import sys
|
10 |
import s3fs
|
11 |
from glob import glob
|
|
|
12 |
import numpy as np
|
13 |
+
|
14 |
from atac_rna_data_processing.config.load_config import load_config
|
15 |
from atac_rna_data_processing.io.celltype import GETCellType
|
16 |
from atac_rna_data_processing.io.nr_motif_v1 import NrMotifV1
|
17 |
from proscope.af2 import AFPairseg
|
18 |
+
from proscope.data import get_genename_to_uniprot, get_lddt, get_seq
|
19 |
from proscope.protein import Protein
|
20 |
from proscope.viewer import view_pdb_html
|
21 |
|
22 |
+
|
23 |
+
seq = get_seq()
|
24 |
+
genename_to_uniprot = get_genename_to_uniprot()
|
25 |
+
lddt = get_lddt()
|
26 |
+
|
27 |
args = argparse.ArgumentParser()
|
28 |
args.add_argument("-p", "--port", type=int, default=7860, help="Port number")
|
29 |
args.add_argument("-s", "--share", action="store_true", help="Share on network")
|
|
|
30 |
args.add_argument("-u", "--s3_uri", type=str, default="None", help="Path to demo S3 bucket")
|
31 |
+
args.add_argument("-d", "--data", type=str, default="None", help="Data directory")
|
32 |
args = args.parse_args()
|
|
|
|
|
33 |
|
|
|
|
|
34 |
GET_CONFIG = load_config(
|
35 |
"/app/modules/atac_rna_data_processing/atac_rna_data_processing/config/GET"
|
36 |
)
|
|
|
38 |
GET_CONFIG.celltype.num_cls = 2
|
39 |
GET_CONFIG.celltype.input = True
|
40 |
GET_CONFIG.celltype.embed = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
+
if args.s3_uri: # Use S3 path if exists
|
43 |
+
GET_CONFIG.s3_uri = args.s3_uri
|
|
|
|
|
|
|
|
|
|
|
44 |
s3 = s3fs.S3FileSystem()
|
45 |
+
GET_CONFIG.celltype.data_dir = (
|
46 |
+
f"{args.s3_uri}/pretrain_human_bingren_shendure_apr2023/fetal_adult/"
|
47 |
+
)
|
48 |
+
GET_CONFIG.celltype.interpret_dir = (
|
49 |
+
f"{args.s3_uri}/Interpretation_all_hg38_allembed_v4_natac/"
|
50 |
+
)
|
51 |
+
GET_CONFIG.motif_dir = f"{args.s3_uri}/interpret_natac/motif-clustering"
|
52 |
available_celltypes = sorted(
|
53 |
[
|
54 |
cell_type_id_to_name[f.split("/")[-1]]
|
55 |
for f in s3.glob(GET_CONFIG.celltype.interpret_dir + "*")
|
56 |
]
|
57 |
)
|
58 |
+
gene_pairs = s3.glob(f"{args.s3_uri}/structures/causal/*")
|
59 |
else:
|
60 |
+
GET_CONFIG.celltype.data_dir = (
|
61 |
+
f"{args.data}/pretrain_human_bingren_shendure_apr2023/fetal_adult/"
|
62 |
+
)
|
63 |
+
GET_CONFIG.celltype.interpret_dir = (
|
64 |
+
f"{args.data}/Interpretation_all_hg38_allembed_v4_natac/"
|
65 |
+
)
|
66 |
+
GET_CONFIG.motif_dir = f"{args.data}/interpret_natac/motif-clustering"
|
67 |
available_celltypes = sorted(
|
68 |
[
|
69 |
cell_type_id_to_name[f.split("/")[-1]]
|
70 |
for f in glob(GET_CONFIG.celltype.interpret_dir + "*")
|
71 |
]
|
72 |
)
|
73 |
+
gene_pairs = glob(f"{args.data}/structures/causal/*")
|
74 |
+
|
75 |
+
gene_pairs = [os.path.basename(pair) for pair in gene_pairs]
|
76 |
+
motif = NrMotifV1.load_from_pickle(
|
77 |
+
pkg_resources.resource_filename("atac_rna_data_processing", "data/NrMotifV1.pkl"),
|
78 |
+
GET_CONFIG.motif_dir,
|
79 |
+
)
|
80 |
+
cell_type_annot = pd.read_csv(
|
81 |
+
GET_CONFIG.celltype.data_dir.split("fetal_adult")[0]
|
82 |
+
+ "data/cell_type_pretrain_human_bingren_shendure_apr2023.txt"
|
83 |
+
)
|
84 |
+
cell_type_id_to_name = dict(zip(cell_type_annot["id"], cell_type_annot["celltype"]))
|
85 |
+
cell_type_name_to_id = dict(zip(cell_type_annot["celltype"], cell_type_annot["id"]))
|
86 |
plt.rcParams["figure.dpi"] = 100
|
87 |
|
88 |
|
89 |
def visualize_AF2(tf_pair, a):
|
90 |
+
strcture_dir = f"{args.s3_uri}/structures/causal/{tf_pair}"
|
91 |
+
fasta_dir = f"{args.s3_uri}/sequences/causal/{tf_pair}"
|
92 |
if not os.path.exists(strcture_dir):
|
93 |
gr.ErrorText("No such gene pair")
|
94 |
|