fuxialexander commited on
Commit
12761b6
1 Parent(s): 787dca0

add regulatory demo

Browse files
Files changed (7) hide show
  1. .gitignore +6 -0
  2. .gitmodules +3 -3
  3. Dockerfile +5 -1
  4. README.md +3 -1
  5. app/main.py +128 -11
  6. modules/atac_rna_data_processing +1 -0
  7. modules/proscope +0 -1
.gitignore CHANGED
@@ -1 +1,7 @@
1
  data
 
 
 
 
 
 
 
1
  data
2
+ getdemo
3
+ libs
4
+ etc
5
+ app/assets
6
+
7
+ getdemo-1.0.0.sif
.gitmodules CHANGED
@@ -1,3 +1,3 @@
1
- [submodule "modules/proscope"]
2
- path = modules/proscope
3
- url = git@github.com:fuxialexander/proscope.git
 
1
+ [submodule "modules/atac_rna_data_processing"]
2
+ path = modules/atac_rna_data_processing
3
+ url = git@github.com:fuxialexander/atac_rna_data_processing.git
Dockerfile CHANGED
@@ -6,7 +6,8 @@ WORKDIR /app
6
 
7
 
8
  # Create a new environment using mamba with specified packages
9
- RUN micromamba install -n base -c conda-forge -c bioconda -y python=3.10 git pip biopython nglview tqdm matplotlib pandas xmlschema seaborn numpy py3Dmol
 
10
  ARG MAMBA_DOCKERFILE_ACTIVATE=1
11
  # Activate the environment and install additional packages via pip
12
  RUN pip3 install gradio
@@ -34,6 +35,9 @@ COPY --chown=$MAMBA_USER:$MAMBA_USER app /app/app
34
  RUN cd modules/proscope && \
35
  pip3 install .
36
 
 
 
 
37
  WORKDIR /app
38
 
39
  # Make port 80 available to the world outside this container
 
6
 
7
 
8
  # Create a new environment using mamba with specified packages
9
+ RUN micromamba install -n base -c conda-forge -c bioconda -y python=3.10 pip biopython nglview dash-bio tqdm matplotlib pygraphviz pandas openpyxl pyarrow python-box xmlschema seaborn numpy py3Dmol pyranges scipy pyyaml zarr numcodecs pybigwig networkx plotly pysam requests seqlogo MOODS urllib3 pyliftover gprofiler-official pyfaidx
10
+
11
  ARG MAMBA_DOCKERFILE_ACTIVATE=1
12
  # Activate the environment and install additional packages via pip
13
  RUN pip3 install gradio
 
35
  RUN cd modules/proscope && \
36
  pip3 install .
37
 
38
+ RUN cd modules/atac_rna_data_processing && \
39
+ pip3 install .
40
+
41
  WORKDIR /app
42
 
43
  # Make port 80 available to the world outside this container
README.md CHANGED
@@ -25,6 +25,8 @@ git clone --recursive git@github.com:fuxialexander/getdemo.git
25
  cd getdemo
26
  docker pull fuxialexander/getdemo:latest
27
  docker run -it -v "/path/to/data:/data" --rm -p 7681:7681 fuxialexander/getdemo
 
 
28
  ```
29
  The gradio interface will be available at http://127.0.0.1:7681, a sharable link will be printed in the terminal.
30
 
@@ -34,4 +36,4 @@ git clone --recursive git@github.com:fuxialexander/getdemo.git
34
  cd getdemo
35
  docker build -t getdemo .
36
  docker run -it -v "/path/to/data:/data" --rm -p 7681:7681 getdemo
37
- ```
 
25
  cd getdemo
26
  docker pull fuxialexander/getdemo:latest
27
  docker run -it -v "/path/to/data:/data" --rm -p 7681:7681 fuxialexander/getdemo
28
+ or
29
+ singularity run -w --bind /manitou/pmg/users/xf2217/getdemo:/app --bind /manitou/pmg/users/xf2217/demo_data:/data --bind /pmglocal/xf2217/tmp:/tmp --no-home --pwd /app getdemo
30
  ```
31
  The gradio interface will be available at http://127.0.0.1:7681, a sharable link will be printed in the terminal.
32
 
 
36
  cd getdemo
37
  docker build -t getdemo .
38
  docker run -it -v "/path/to/data:/data" --rm -p 7681:7681 getdemo
39
+ ```
app/main.py CHANGED
@@ -1,29 +1,62 @@
1
- import glob
 
2
  import os
3
 
4
- import argparse
5
  import gradio as gr
6
  import matplotlib.pyplot as plt
7
- from proscope.data import get_seq, get_genename_to_uniprot, get_lddt
 
 
 
 
8
  seq = get_seq()
9
  genename_to_uniprot = get_genename_to_uniprot()
10
  lddt = get_lddt()
 
 
 
 
 
 
 
11
  from proscope.af2 import AFPairseg
12
  from proscope.protein import Protein
13
  from proscope.viewer import view_pdb_html
14
 
15
-
16
  args = argparse.ArgumentParser()
17
  args.add_argument("-p", "--port", type=int, default=7860, help="Port number")
18
  args.add_argument("-s", "--share", action="store_true", help="Share on network")
19
  args.add_argument("-d", "--data", type=str, default="/data", help="Data directory")
20
- args = args.parse_args()
21
- gene_pairs = glob.glob(f"{args.data}/structures/causal/*")
 
 
 
22
  gene_pairs = [os.path.basename(pair) for pair in gene_pairs]
23
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  # set plot ppi to 100
25
  plt.rcParams['figure.dpi'] = 100
26
 
 
 
27
  def visualize_AF2(tf_pair, a):
28
  strcture_dir = f"{args.data}/structures/causal/{tf_pair}"
29
  fasta_dir = f"{args.data}/sequences/causal/{tf_pair}"
@@ -46,9 +79,45 @@ def view_pdb(seg_pair, a):
46
  return view_pdb_html(pdb_path), a, pdb_path
47
 
48
 
 
49
  def update_dropdown(x, label):
50
  return gr.Dropdown.update(choices=x, label=label)
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  # main
54
  if __name__ == '__main__':
@@ -56,10 +125,50 @@ if __name__ == '__main__':
56
 
57
  seg_pairs = gr.State([''])
58
  af = gr.State(None)
 
 
59
  with gr.Row() as row:
 
60
  with gr.Column():
61
- tf_pairs = gr.Dropdown(label='TF pair', choices=gene_pairs)
62
- tf_pairs_btn = gr.Button(value='Load & Plot')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  interact_plddt1 = gr.Plot(label='Interact pLDDT 1')
64
  interact_plddt2 = gr.Plot(label='Interact pLDDT 2')
65
  protein1_plddt = gr.Plot(label='Protein 1 pLDDT')
@@ -68,13 +177,21 @@ if __name__ == '__main__':
68
  heatmap = gr.Plot(label='Heatmap')
69
 
70
  with gr.Column():
71
- segpair = gr.Dropdown(label='Seg pair', choices=seg_pairs.value)
72
- segpair_btn = gr.Button(value='Get PDB')
 
73
  pdb_html = gr.HTML(label="PDB HTML")
74
  pdb_file = gr.File(label='Download PDB')
75
 
76
  tf_pairs_btn.click(visualize_AF2, inputs = [tf_pairs, af], outputs = [ interact_plddt1, interact_plddt2, protein1_plddt, protein2_plddt, heatmap, segpair, af])
77
  segpair_btn.click(view_pdb, inputs=[segpair, af], outputs=[pdb_html, af, pdb_file])
 
 
 
 
 
78
 
79
  demo.launch(share=args.share, server_port=args.port)
80
 
 
 
 
1
+ #%%
2
+ import argparse
3
  import os
4
 
 
5
  import gradio as gr
6
  import matplotlib.pyplot as plt
7
+ import pkg_resources
8
+ from proscope.data import get_genename_to_uniprot, get_lddt, get_seq
9
+ import pandas as pd
10
+ from dash_bio import Clustergram
11
+
12
  seq = get_seq()
13
  genename_to_uniprot = get_genename_to_uniprot()
14
  lddt = get_lddt()
15
+ import sys
16
+ from glob import glob
17
+
18
+ import numpy as np
19
+ from atac_rna_data_processing.config.load_config import load_config
20
+ from atac_rna_data_processing.io.celltype import GETCellType
21
+ from atac_rna_data_processing.io.nr_motif_v1 import NrMotifV1
22
  from proscope.af2 import AFPairseg
23
  from proscope.protein import Protein
24
  from proscope.viewer import view_pdb_html
25
 
26
+ #%%
27
  args = argparse.ArgumentParser()
28
  args.add_argument("-p", "--port", type=int, default=7860, help="Port number")
29
  args.add_argument("-s", "--share", action="store_true", help="Share on network")
30
  args.add_argument("-d", "--data", type=str, default="/data", help="Data directory")
31
+ # args = args.parse_args()
32
+ # set pseudo args
33
+ args = args.parse_args(['-p', '7869', '-s', '-d', '/manitou/pmg/users/xf2217/demo_data'])
34
+ #%%
35
+ gene_pairs = glob(f"{args.data}/structures/causal/*")
36
  gene_pairs = [os.path.basename(pair) for pair in gene_pairs]
37
+ GET_CONFIG = load_config('/manitou/pmg/users/xf2217/atac_rna_data_processing/atac_rna_data_processing/config/GET')
38
+ GET_CONFIG.celltype.jacob=True
39
+ GET_CONFIG.celltype.num_cls=2
40
+ GET_CONFIG.celltype.input=True
41
+ GET_CONFIG.celltype.embed=True
42
+ GET_CONFIG.celltype.data_dir = '/manitou/pmg/users/xf2217/pretrain_human_bingren_shendure_apr2023/fetal_adult/'
43
+ GET_CONFIG.celltype.interpret_dir='/manitou/pmg/users/xf2217/Interpretation_all_hg38_allembed_v4_natac/'
44
+ GET_CONFIG.motif_dir = '/manitou/pmg/users/xf2217/interpret_natac/motif-clustering'
45
+ motif = NrMotifV1.load_from_pickle(
46
+ pkg_resources.resource_filename("atac_rna_data_processing", "data/NrMotifV1.pkl"),
47
+ GET_CONFIG.motif_dir
48
+ )
49
+ cell_type_annot = pd.read_csv(GET_CONFIG.celltype.data_dir.split('fetal_adult')[0] + 'data/cell_type_pretrain_human_bingren_shendure_apr2023.txt')
50
+ cell_type_id_to_name = dict(zip(cell_type_annot['id'], cell_type_annot['celltype']))
51
+ cell_type_name_to_id = dict(zip(cell_type_annot['celltype'], cell_type_annot['id']))
52
+ avaliable_celltypes = sorted([cell_type_id_to_name[f.split('/')[-1]] for f in glob(GET_CONFIG.celltype.interpret_dir+'*')])
53
+ #%%
54
+ # fill this in...
55
  # set plot ppi to 100
56
  plt.rcParams['figure.dpi'] = 100
57
 
58
+
59
+
60
  def visualize_AF2(tf_pair, a):
61
  strcture_dir = f"{args.data}/structures/causal/{tf_pair}"
62
  fasta_dir = f"{args.data}/sequences/causal/{tf_pair}"
 
79
  return view_pdb_html(pdb_path), a, pdb_path
80
 
81
 
82
+
83
  def update_dropdown(x, label):
84
  return gr.Dropdown.update(choices=x, label=label)
85
 
86
+ def load_and_plot_celltype(celltype_name, GET_CONFIG, cell):
87
+ celltype_id = cell_type_name_to_id[celltype_name]
88
+ cell = GETCellType(celltype_id, GET_CONFIG)
89
+ cell.celltype_name = celltype_name
90
+ gene_exp_fig = cell.plotly_gene_exp()
91
+ gene_exp_table = cell.gene_annot.groupby('gene_name')[['pred', 'obs', 'accessibility']].mean().reset_index()
92
+ return gene_exp_fig, gene_exp_table, cell
93
+
94
+ def plot_gene_regions(cell, gene_name, plotly=True):
95
+ return cell.plot_gene_regions(gene_name, plotly=plotly), cell
96
+
97
+ def plot_gene_motifs(cell, gene_name, motif, overwrite=False):
98
+ return cell.plot_gene_motifs(gene_name, motif, overwrite=overwrite)[0], cell
99
+
100
+ def plot_motif_subnet(cell, motif_collection, m, type='neighbors', threshold=0.1):
101
+ return cell.plotly_motif_subnet(motif_collection, m, type=type, threshold=threshold), cell
102
+
103
+ def plot_gene_exp(cell, plotly=True):
104
+ return cell.plotly_gene_exp(plotly=plotly), cell
105
+
106
+ def plot_motif_corr(cell):
107
+ fig = Clustergram(data=cell.gene_by_motif.corr,
108
+ column_labels=list(cell.gene_by_motif.corr.columns.values),
109
+ row_labels=list(cell.gene_by_motif.corr.index),
110
+ hidden_labels=['row', 'col'],
111
+ link_method='average',
112
+ display_ratio=0.1,
113
+ width=600,
114
+ height=400,
115
+ color_map='rdbu_r',
116
+ )
117
+ return fig, cell
118
+
119
+ #%%
120
+ # fill this in...
121
 
122
  # main
123
  if __name__ == '__main__':
 
125
 
126
  seg_pairs = gr.State([''])
127
  af = gr.State(None)
128
+ cell = gr.State(None)
129
+
130
  with gr.Row() as row:
131
+ # Left column: Plot gene expression and gene regions
132
  with gr.Column():
133
+ with gr.Row() as row:
134
+ celltype_name = gr.Dropdown(label='Cell Type', choices=avaliable_celltypes)
135
+ celltype_btn = gr.Button(value='Load & Plot Gene Expression')
136
+ gene_exp_plot = gr.Plot(label='Gene Expression Pred vs Obs')
137
+ gene_exp_table = gr.DataFrame(label='Gene Expression Table', max_rows=10)
138
+
139
+ # Right column: Plot gene motifs
140
+ with gr.Column():
141
+ gene_name_for_region = gr.Textbox(label='Get important regions or motifs for gene:')
142
+ with gr.Row() as row:
143
+ region_plot_btn = gr.Button(value='Regions')
144
+ motif_plot_btn = gr.Button(value='Motifs')
145
+
146
+ region_plot = gr.Plot(label='Gene Regions')
147
+ motif_plot = gr.Plot(label='Gene Motifs')
148
+
149
+
150
+ with gr.Row() as row:
151
+ with gr.Column():
152
+ clustergram_btn = gr.Button(value='Plot Motif Correlation Heatmap')
153
+ clustergram_plot = gr.Plot(label='Motif Correlation')
154
+
155
+
156
+ # Right column: Motif subnet plot
157
+ with gr.Column():
158
+ with gr.Row() as row:
159
+ motif_for_subnet = gr.Dropdown(label='Motif Causal Subnetwork', choices=motif.cluster_names)
160
+ subnet_type = gr.Dropdown(label='Type', choices=['neighbors', 'parents', 'children'], default='neighbors')
161
+ # slider for threshold 0.01-0.2
162
+ subnet_threshold = gr.Slider(label='Threshold', minimum=0.01, maximum=0.25, step=0.01, value=0.1)
163
+ subnet_btn = gr.Button(value='Plot Motif Causal Subnetwork')
164
+ subnet_plot = gr.Plot(label='Motif Causal Subnetwork')
165
+
166
+
167
+ with gr.Row() as row:
168
+ with gr.Column():
169
+ with gr.Row() as row:
170
+ tf_pairs = gr.Dropdown(label='TF pair', choices=gene_pairs)
171
+ tf_pairs_btn = gr.Button(value='Load & Plot')
172
  interact_plddt1 = gr.Plot(label='Interact pLDDT 1')
173
  interact_plddt2 = gr.Plot(label='Interact pLDDT 2')
174
  protein1_plddt = gr.Plot(label='Protein 1 pLDDT')
 
177
  heatmap = gr.Plot(label='Heatmap')
178
 
179
  with gr.Column():
180
+ with gr.Row() as row:
181
+ segpair = gr.Dropdown(label='Seg pair', choices=seg_pairs.value)
182
+ segpair_btn = gr.Button(value='Get PDB')
183
  pdb_html = gr.HTML(label="PDB HTML")
184
  pdb_file = gr.File(label='Download PDB')
185
 
186
  tf_pairs_btn.click(visualize_AF2, inputs = [tf_pairs, af], outputs = [ interact_plddt1, interact_plddt2, protein1_plddt, protein2_plddt, heatmap, segpair, af])
187
  segpair_btn.click(view_pdb, inputs=[segpair, af], outputs=[pdb_html, af, pdb_file])
188
+ celltype_btn.click(load_and_plot_celltype, inputs=[celltype_name, gr.State(GET_CONFIG), cell], outputs=[gene_exp_plot, gene_exp_table, cell])
189
+ region_plot_btn.click(plot_gene_regions, inputs=[cell, gene_name_for_region], outputs=[region_plot, cell])
190
+ motif_plot_btn.click(plot_gene_motifs, inputs=[cell, gene_name_for_region, gr.State(motif)], outputs=[motif_plot, cell])
191
+ clustergram_btn.click(plot_motif_corr, inputs=[cell], outputs=[clustergram_plot, cell])
192
+ subnet_btn.click(plot_motif_subnet, inputs=[cell, gr.State(motif), motif_for_subnet, subnet_type, subnet_threshold], outputs=[subnet_plot, cell])
193
 
194
  demo.launch(share=args.share, server_port=args.port)
195
 
196
+
197
+ # %%
modules/atac_rna_data_processing ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit fc337002918de1e5f1f864e7ba94864a743fd16c
modules/proscope DELETED
@@ -1 +0,0 @@
1
- Subproject commit fdb98d02f0d2345a74f28e71211849992893fa5d