supercat666 commited on
Commit
3d0dd11
1 Parent(s): e7225c8
Files changed (1) hide show
  1. app.py +73 -65
app.py CHANGED
@@ -107,10 +107,10 @@ if selected_model == 'Cas9':
107
  def clean_up_old_files(gene_symbol):
108
  genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
109
  bed_file_path = f"{gene_symbol}_crispr_targets.bed"
110
- if os.path.exists(genbank_file_path):
111
- os.remove(genbank_file_path)
112
- if os.path.exists(bed_file_path):
113
- os.remove(bed_file_path)
114
 
115
  if st.session_state['current_gene_symbol'] and gene_symbol != st.session_state['current_gene_symbol']:
116
  clean_up_old_files(st.session_state['current_gene_symbol'])
@@ -136,44 +136,47 @@ if selected_model == 'Cas9':
136
  # Now create a Plotly plot with the sorted_predictions
137
  fig = go.Figure()
138
 
139
- # Variables to help spread gRNAs on the y-axis based on their strand
140
- y_positive_strand = 10
141
- y_negative_strand = -10
142
- strand_offset = 1 # This will space out each subsequent guide on the same strand
143
 
144
  # Iterate over the sorted predictions to create the plot
145
  for i, prediction in enumerate(sorted_predictions, start=1):
146
  # Extract data for plotting
147
  chrom, start, end, strand, target, gRNA, pred_score = prediction
148
- y_value = y_positive_strand if strand == 1 else y_negative_strand
 
 
 
 
 
 
 
 
 
149
  fig.add_trace(go.Scatter(
150
  x=[start, end],
151
- y=[y_value, y_value], # Assign all points the same y value based on strand
152
  mode='lines+markers+text',
 
153
  name=f"gRNA: {gRNA}",
154
- text=[f"Rank: {i}", ""], # Text at the start position only
155
  hoverinfo='text',
156
- hovertext=[
157
- f"Rank: {i}<br>Target: {target}<br>gRNA: {gRNA}<br>Cutsite: {start}<br>On Target Score: {pred_score:.4f}",
158
- ""
159
- ],
160
  ))
161
- # Update the y-value for the next guide on the same strand
162
- if strand == 1:
163
- y_positive_strand += strand_offset
164
- else:
165
- y_negative_strand -= strand_offset
166
 
167
  # Update the layout of the plot
168
  fig.update_layout(
169
  title='Top 10 gRNA Sequences by Prediction Score',
170
  xaxis_title='Genomic Position',
171
- yaxis_title='Strand',
172
  yaxis=dict(
 
173
  showgrid=True, # Show horizontal gridlines for clarity
174
- zeroline=True, # Show a line at y=0
175
  zerolinecolor='Black',
176
  zerolinewidth=2,
 
 
177
  ),
178
  showlegend=False # Hide the legend if it's not necessary
179
  )
@@ -181,7 +184,7 @@ if selected_model == 'Cas9':
181
  # Display the plot
182
  st.plotly_chart(fig)
183
 
184
- if gene_sequence: # Ensure gene_sequence is not empty
185
  # Define file paths
186
  genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
187
  bed_file_path = f"{gene_symbol}_crispr_targets.bed"
@@ -192,26 +195,27 @@ if selected_model == 'Cas9':
192
  cas9on.create_bed_file_from_df(df, bed_file_path)
193
  cas9on.create_csv_from_df(df, csv_file_path)
194
 
195
- # File download selection
196
- file_type = st.selectbox('Select file type to download:', ('GenBank', 'BED', 'CSV'))
197
-
198
- if file_type == 'GenBank':
199
  with open(genbank_file_path, "rb") as file:
200
  st.download_button(label="Download GenBank File", data=file, file_name=genbank_file_path,
201
  mime="text/x-genbank")
202
- st.markdown(
203
- "GenBank files can be visualized using [PyGenomeViz](https://pygenomeviz.streamlit.app/)")
204
- elif file_type == 'BED':
205
  with open(bed_file_path, "rb") as file:
206
  st.download_button(label="Download BED File", data=file, file_name=bed_file_path,
207
  mime="text/plain")
208
- st.markdown(
209
- "BED files can be used with the [UCSC Genome Browser](https://genome.ucsc.edu/cgi-bin/hgCustom)")
210
- elif file_type == 'CSV':
211
  with open(csv_file_path, "rb") as file:
212
  st.download_button(label="Download CSV File", data=file, file_name=csv_file_path,
213
  mime="text/csv")
214
 
 
 
 
 
 
 
215
  # # Visualize the GenBank file using pyGenomeViz
216
  # gv = GenomeViz(
217
  # feature_track_ratio=0.3,
@@ -333,10 +337,10 @@ elif selected_model == 'Cas12':
333
  def clean_up_old_files(gene_symbol):
334
  genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
335
  bed_file_path = f"{gene_symbol}_crispr_targets.bed"
336
- if os.path.exists(genbank_file_path):
337
- os.remove(genbank_file_path)
338
- if os.path.exists(bed_file_path):
339
- os.remove(bed_file_path)
340
 
341
  # Clean up files if a new gene symbol is entered
342
  if st.session_state['current_gene_symbol'] and gene_symbol != st.session_state['current_gene_symbol']:
@@ -362,44 +366,47 @@ elif selected_model == 'Cas12':
362
  # Now create a Plotly plot with the sorted_predictions
363
  fig = go.Figure()
364
 
365
- # Variables to help spread gRNAs on the y-axis based on their strand
366
- y_positive_strand = 10
367
- y_negative_strand = -10
368
- strand_offset = 1 # This will space out each subsequent guide on the same strand
369
 
370
  # Iterate over the sorted predictions to create the plot
371
  for i, prediction in enumerate(sorted_predictions, start=1):
372
  # Extract data for plotting
373
  chrom, start, end, strand, target, gRNA, pred_score = prediction
374
- y_value = y_positive_strand if strand == 1 else y_negative_strand
 
 
 
 
 
 
 
 
 
375
  fig.add_trace(go.Scatter(
376
  x=[start, end],
377
- y=[y_value, y_value], # Assign all points the same y value based on strand
378
  mode='lines+markers+text',
 
379
  name=f"gRNA: {gRNA}",
380
- text=[f"Rank: {i}", ""], # Text at the start position only
381
  hoverinfo='text',
382
- hovertext=[
383
- f"Rank: {i}<br>Target: {target}<br>gRNA: {gRNA}<br>Cutsite: {start}<br>On Target Score: {pred_score:.4f}",
384
- ""
385
- ],
386
  ))
387
- # Update the y-value for the next guide on the same strand
388
- if strand == 1:
389
- y_positive_strand += strand_offset
390
- else:
391
- y_negative_strand -= strand_offset
392
 
393
  # Update the layout of the plot
394
  fig.update_layout(
395
  title='Top 10 gRNA Sequences by Prediction Score',
396
  xaxis_title='Genomic Position',
397
- yaxis_title='Strand',
398
  yaxis=dict(
 
399
  showgrid=True, # Show horizontal gridlines for clarity
400
- zeroline=True, # Show a line at y=0
401
  zerolinecolor='Black',
402
  zerolinewidth=2,
 
 
403
  ),
404
  showlegend=False # Hide the legend if it's not necessary
405
  )
@@ -419,26 +426,27 @@ elif selected_model == 'Cas12':
419
  cas9on.create_bed_file_from_df(df, bed_file_path)
420
  cas9on.create_csv_from_df(df, csv_file_path)
421
 
422
- # File download selection
423
- file_type = st.selectbox('Select file type to download:', ('GenBank', 'BED', 'CSV'))
424
-
425
- if file_type == 'GenBank':
426
  with open(genbank_file_path, "rb") as file:
427
  st.download_button(label="Download GenBank File", data=file, file_name=genbank_file_path,
428
  mime="text/x-genbank")
429
- st.markdown(
430
- "GenBank files can be visualized using [PyGenomeViz](https://pygenomeviz.streamlit.app/)")
431
- elif file_type == 'BED':
432
  with open(bed_file_path, "rb") as file:
433
  st.download_button(label="Download BED File", data=file, file_name=bed_file_path,
434
  mime="text/plain")
435
- st.markdown(
436
- "BED files can be used with the [UCSC Genome Browser](https://genome.ucsc.edu/cgi-bin/hgCustom)")
437
- elif file_type == 'CSV':
438
  with open(csv_file_path, "rb") as file:
439
  st.download_button(label="Download CSV File", data=file, file_name=csv_file_path,
440
  mime="text/csv")
441
 
 
 
 
 
 
 
442
 
443
 
444
  elif selected_model == 'Cas13d':
 
107
  def clean_up_old_files(gene_symbol):
108
  genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
109
  bed_file_path = f"{gene_symbol}_crispr_targets.bed"
110
+ csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
111
+ for path in [genbank_file_path, bed_file_path, csv_file_path]:
112
+ if os.path.exists(path):
113
+ os.remove(path)
114
 
115
  if st.session_state['current_gene_symbol'] and gene_symbol != st.session_state['current_gene_symbol']:
116
  clean_up_old_files(st.session_state['current_gene_symbol'])
 
136
  # Now create a Plotly plot with the sorted_predictions
137
  fig = go.Figure()
138
 
139
+ # Variables to initialize the y position for the positive and negative strands
140
+ positive_strand_y = 1
141
+ negative_strand_y = -1
 
142
 
143
  # Iterate over the sorted predictions to create the plot
144
  for i, prediction in enumerate(sorted_predictions, start=1):
145
  # Extract data for plotting
146
  chrom, start, end, strand, target, gRNA, pred_score = prediction
147
+ # Set the y-value and arrow symbol based on the strand
148
+ if strand == 1:
149
+ y_value = positive_strand_y
150
+ arrow_symbol = 'triangle-right'
151
+ positive_strand_y += 0.1 # Increment the y-value for the next positive strand gRNA
152
+ else:
153
+ y_value = negative_strand_y
154
+ arrow_symbol = 'triangle-left'
155
+ negative_strand_y -= 0.1 # Decrement the y-value for the next negative strand gRNA
156
+
157
  fig.add_trace(go.Scatter(
158
  x=[start, end],
159
+ y=[y_value, y_value], # Use the y_value set above for the strand
160
  mode='lines+markers+text',
161
+ marker=dict(symbol=arrow_symbol, size=10),
162
  name=f"gRNA: {gRNA}",
163
+ text=[f"Rank: {i}", ""], # Text at the first point
164
  hoverinfo='text',
165
+ hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == 1 else '-'}<br>Prediction Score: {pred_score:.4f}",
 
 
 
166
  ))
 
 
 
 
 
167
 
168
  # Update the layout of the plot
169
  fig.update_layout(
170
  title='Top 10 gRNA Sequences by Prediction Score',
171
  xaxis_title='Genomic Position',
 
172
  yaxis=dict(
173
+ title='Strand',
174
  showgrid=True, # Show horizontal gridlines for clarity
175
+ zeroline=True, # Show a line at y=0 to represent the axis
176
  zerolinecolor='Black',
177
  zerolinewidth=2,
178
+ tickvals=[positive_strand_y, negative_strand_y],
179
+ ticktext=['+ Strand', '- Strand']
180
  ),
181
  showlegend=False # Hide the legend if it's not necessary
182
  )
 
184
  # Display the plot
185
  st.plotly_chart(fig)
186
 
187
+ if gene_sequence:
188
  # Define file paths
189
  genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
190
  bed_file_path = f"{gene_symbol}_crispr_targets.bed"
 
195
  cas9on.create_bed_file_from_df(df, bed_file_path)
196
  cas9on.create_csv_from_df(df, csv_file_path)
197
 
198
+ # Layout for download buttons
199
+ col1, col2, col3 = st.columns(3)
200
+ with col1:
 
201
  with open(genbank_file_path, "rb") as file:
202
  st.download_button(label="Download GenBank File", data=file, file_name=genbank_file_path,
203
  mime="text/x-genbank")
204
+ with col2:
 
 
205
  with open(bed_file_path, "rb") as file:
206
  st.download_button(label="Download BED File", data=file, file_name=bed_file_path,
207
  mime="text/plain")
208
+ with col3:
 
 
209
  with open(csv_file_path, "rb") as file:
210
  st.download_button(label="Download CSV File", data=file, file_name=csv_file_path,
211
  mime="text/csv")
212
 
213
+ # Links for user guidance on using the downloaded files
214
+ st.markdown(
215
+ "GenBank files can be visualized using [PyGenomeViz](https://pygenomeviz.streamlit.app/). "
216
+ "BED files can be used with the [UCSC Genome Browser](https://genome.ucsc.edu/cgi-bin/hgCustom)."
217
+ )
218
+
219
  # # Visualize the GenBank file using pyGenomeViz
220
  # gv = GenomeViz(
221
  # feature_track_ratio=0.3,
 
337
  def clean_up_old_files(gene_symbol):
338
  genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
339
  bed_file_path = f"{gene_symbol}_crispr_targets.bed"
340
+ csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
341
+ for path in [genbank_file_path, bed_file_path, csv_file_path]:
342
+ if os.path.exists(path):
343
+ os.remove(path)
344
 
345
  # Clean up files if a new gene symbol is entered
346
  if st.session_state['current_gene_symbol'] and gene_symbol != st.session_state['current_gene_symbol']:
 
366
  # Now create a Plotly plot with the sorted_predictions
367
  fig = go.Figure()
368
 
369
+ # Variables to initialize the y position for the positive and negative strands
370
+ positive_strand_y = 1
371
+ negative_strand_y = -1
 
372
 
373
  # Iterate over the sorted predictions to create the plot
374
  for i, prediction in enumerate(sorted_predictions, start=1):
375
  # Extract data for plotting
376
  chrom, start, end, strand, target, gRNA, pred_score = prediction
377
+ # Set the y-value and arrow symbol based on the strand
378
+ if strand == 1:
379
+ y_value = positive_strand_y
380
+ arrow_symbol = 'triangle-right'
381
+ positive_strand_y += 0.1 # Increment the y-value for the next positive strand gRNA
382
+ else:
383
+ y_value = negative_strand_y
384
+ arrow_symbol = 'triangle-left'
385
+ negative_strand_y -= 0.1 # Decrement the y-value for the next negative strand gRNA
386
+
387
  fig.add_trace(go.Scatter(
388
  x=[start, end],
389
+ y=[y_value, y_value], # Use the y_value set above for the strand
390
  mode='lines+markers+text',
391
+ marker=dict(symbol=arrow_symbol, size=10),
392
  name=f"gRNA: {gRNA}",
393
+ text=[f"Rank: {i}", ""], # Text at the first point
394
  hoverinfo='text',
395
+ hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == 1 else '-'}<br>Prediction Score: {pred_score:.4f}",
 
 
 
396
  ))
 
 
 
 
 
397
 
398
  # Update the layout of the plot
399
  fig.update_layout(
400
  title='Top 10 gRNA Sequences by Prediction Score',
401
  xaxis_title='Genomic Position',
 
402
  yaxis=dict(
403
+ title='Strand',
404
  showgrid=True, # Show horizontal gridlines for clarity
405
+ zeroline=True, # Show a line at y=0 to represent the axis
406
  zerolinecolor='Black',
407
  zerolinewidth=2,
408
+ tickvals=[positive_strand_y, negative_strand_y],
409
+ ticktext=['+ Strand', '- Strand']
410
  ),
411
  showlegend=False # Hide the legend if it's not necessary
412
  )
 
426
  cas9on.create_bed_file_from_df(df, bed_file_path)
427
  cas9on.create_csv_from_df(df, csv_file_path)
428
 
429
+ # Layout for download buttons
430
+ col1, col2, col3 = st.columns(3)
431
+ with col1:
 
432
  with open(genbank_file_path, "rb") as file:
433
  st.download_button(label="Download GenBank File", data=file, file_name=genbank_file_path,
434
  mime="text/x-genbank")
435
+ with col2:
 
 
436
  with open(bed_file_path, "rb") as file:
437
  st.download_button(label="Download BED File", data=file, file_name=bed_file_path,
438
  mime="text/plain")
439
+ with col3:
 
 
440
  with open(csv_file_path, "rb") as file:
441
  st.download_button(label="Download CSV File", data=file, file_name=csv_file_path,
442
  mime="text/csv")
443
 
444
+ # Links for user guidance on using the downloaded files
445
+ st.markdown(
446
+ "GenBank files can be visualized using [PyGenomeViz](https://pygenomeviz.streamlit.app/). "
447
+ "BED files can be used with the [UCSC Genome Browser](https://genome.ucsc.edu/cgi-bin/hgCustom)."
448
+ )
449
+
450
 
451
 
452
  elif selected_model == 'Cas13d':