supercat666 commited on
Commit
7274bd3
1 Parent(s): 1d62c05

fixed visual

Browse files
Files changed (3) hide show
  1. app.py +83 -57
  2. cas12.py +2 -0
  3. cas9on.py +4 -1
app.py CHANGED
@@ -136,55 +136,70 @@ if selected_model == 'Cas9':
136
  # Now create a Plotly plot with the sorted_predictions
137
  fig = go.Figure()
138
 
 
 
 
139
  # Iterate over the sorted predictions to create the plot
140
  for i, prediction in enumerate(sorted_predictions, start=1):
141
  # Extract data for plotting
142
  chrom, start, end, strand, target, gRNA, pred_score = prediction # Adjusted to include the target sequence
 
 
143
  fig.add_trace(go.Scatter(
144
  x=[start, end],
145
- y=[i, i], # Y-values are just the rank of the prediction
146
  mode='lines+markers+text',
147
  name=f"gRNA: {gRNA}",
148
- text=[f"Rank: {i}", ""], # Text at the start position only
149
  hoverinfo='text',
150
- hovertext=[
151
- f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' else '-'}<br>Prediction Score: {pred_score:.4f}",
152
- ""
153
- ],
154
  ))
155
 
156
  # Update the layout of the plot
157
  fig.update_layout(
158
- title='Top 10 gRNA Sequences by Prediction Score',
159
  xaxis_title='Genomic Position',
160
- yaxis_title='Rank',
161
- yaxis=dict(showticklabels=False)
162
- # Hide the y-axis labels since the rank is indicated in the hovertext
 
 
 
163
  )
164
 
165
  # Display the plot
166
  st.plotly_chart(fig)
167
 
168
  if gene_sequence: # Ensure gene_sequence is not empty
 
169
  genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
170
- cas9on.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
171
  bed_file_path = f"{gene_symbol}_crispr_targets.bed"
172
- cas9on.create_bed_file_from_df(df, bed_file_path)
173
- st.write('Top on-target predictions:')
174
- st.dataframe(df)
175
 
176
- # Add a download button for the GenBank file
177
- with open(genbank_file_path, "rb") as file:
178
- st.download_button(
179
- label="Download GenBank File",
180
- data=file,
181
- file_name=genbank_file_path,
182
- mime="text/x-genbank"
183
- )
184
- # Download button for the BED file
185
- with open(bed_file_path, "rb") as file:
186
- st.download_button(label="Download BED File", data=file,
187
- file_name=bed_file_path, mime="text/plain")
 
 
 
 
 
 
 
 
 
 
 
 
188
 
189
  # # Visualize the GenBank file using pyGenomeViz
190
  # gv = GenomeViz(
@@ -336,60 +351,71 @@ elif selected_model == 'Cas12':
336
  # Now create a Plotly plot with the sorted_predictions
337
  fig = go.Figure()
338
 
 
 
 
339
  # Iterate over the sorted predictions to create the plot
340
  for i, prediction in enumerate(sorted_predictions, start=1):
341
  # Extract data for plotting
342
- chrom, start, end, strand, Target, gRNA, pred_score = prediction
343
- # Strand is not used in this plot, but you could use it to determine marker symbol, for example
 
344
  fig.add_trace(go.Scatter(
345
  x=[start, end],
346
- y=[i, i], # Y-values are just the rank of the prediction
347
  mode='lines+markers+text',
348
  name=f"gRNA: {gRNA}",
349
- text=[f"Rank: {i}", ""], # Text at the start position only
350
  hoverinfo='text',
351
- hovertext=[
352
- f"Rank: {i}<br>Chromosome: {chrom}<br>Target: {Target}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == 1 else '-'}<br>Prediction Score: {pred_score:.4f}",
353
- ""
354
- ],
355
  ))
 
356
  # Update the layout of the plot
357
  fig.update_layout(
358
- title='Top 10 gRNA Sequences by Prediction Score',
359
  xaxis_title='Genomic Position',
360
- yaxis_title='Rank',
361
- yaxis=dict(showticklabels=False)
362
- # We hide the y-axis labels since the rank is indicated in the hovertext
 
 
 
363
  )
 
364
  # Display the plot
365
  st.plotly_chart(fig)
366
 
367
  # Ensure gene_sequence is not empty before generating files
368
  if gene_sequence:
 
369
  genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
370
  bed_file_path = f"{gene_symbol}_crispr_targets.bed"
 
371
 
372
- # Generate GenBank file
373
- cas12.generate_genbank_file_from_data(df, gene_sequence, gene_symbol, genbank_file_path)
374
-
375
- # Generate BED file
376
- cas12.generate_bed_file_from_data(df, bed_file_path)
377
 
378
- st.write('Top on-target predictions:')
379
- st.dataframe(df)
380
 
381
- # Download buttons
382
- with open(genbank_file_path, "rb") as file:
383
- st.download_button(
384
- label="Download GenBank File",
385
- data=file,
386
- file_name=genbank_file_path,
387
- mime="text/x-genbank"
388
- )
389
-
390
- with open(bed_file_path, "rb") as file:
391
- st.download_button(label="Download BED File", data=file,
392
- file_name=bed_file_path, mime="text/plain")
 
 
 
 
393
 
394
  # Clean up old files after download buttons are created
395
  clean_up_old_files(gene_symbol)
 
136
  # Now create a Plotly plot with the sorted_predictions
137
  fig = go.Figure()
138
 
139
+ # Set y values based on strand information
140
+ strand_y_values = {'1': 1, '-1': -1}
141
+
142
  # Iterate over the sorted predictions to create the plot
143
  for i, prediction in enumerate(sorted_predictions, start=1):
144
  # Extract data for plotting
145
  chrom, start, end, strand, target, gRNA, pred_score = prediction # Adjusted to include the target sequence
146
+ # Assign y value based on strand
147
+ y_value = strand_y_values[str(strand)] # Convert strand to string for dict lookup
148
  fig.add_trace(go.Scatter(
149
  x=[start, end],
150
+ y=[y_value] * len(start), # Assign all points the same y value based on strand
151
  mode='lines+markers+text',
152
  name=f"gRNA: {gRNA}",
153
+ text=f"Rank: {i}", # Place text at the first point
154
  hoverinfo='text',
155
+ hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' else '-'}<br>Prediction Score: {pred_score:.4f}",
 
 
 
156
  ))
157
 
158
  # Update the layout of the plot
159
  fig.update_layout(
160
+ title='CRISPR Targets by Strand',
161
  xaxis_title='Genomic Position',
162
+ yaxis=dict(
163
+ title='Strand',
164
+ tickmode='array',
165
+ tickvals=[1, -1],
166
+ ticktext=['+ Strand', '- Strand']
167
+ )
168
  )
169
 
170
  # Display the plot
171
  st.plotly_chart(fig)
172
 
173
  if gene_sequence: # Ensure gene_sequence is not empty
174
+ # Define file paths
175
  genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
 
176
  bed_file_path = f"{gene_symbol}_crispr_targets.bed"
177
+ csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
 
 
178
 
179
+ # Generate files
180
+ cas9on.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
181
+ cas9on.create_bed_file_from_df(df, bed_file_path)
182
+ cas9on.create_csv_from_df(df, csv_file_path)
183
+
184
+ # File download selection
185
+ file_type = st.selectbox('Select file type to download:', ('GenBank', 'BED', 'CSV'))
186
+
187
+ if file_type == 'GenBank':
188
+ with open(genbank_file_path, "rb") as file:
189
+ st.download_button(label="Download GenBank File", data=file, file_name=genbank_file_path,
190
+ mime="text/x-genbank")
191
+ st.markdown(
192
+ "GenBank files can be visualized using [PyGenomeViz](https://pygenomeviz.streamlit.app/)")
193
+ elif file_type == 'BED':
194
+ with open(bed_file_path, "rb") as file:
195
+ st.download_button(label="Download BED File", data=file, file_name=bed_file_path,
196
+ mime="text/plain")
197
+ st.markdown(
198
+ "BED files can be used with the [UCSC Genome Browser](https://genome.ucsc.edu/cgi-bin/hgCustom)")
199
+ elif file_type == 'CSV':
200
+ with open(csv_file_path, "rb") as file:
201
+ st.download_button(label="Download CSV File", data=file, file_name=csv_file_path,
202
+ mime="text/csv")
203
 
204
  # # Visualize the GenBank file using pyGenomeViz
205
  # gv = GenomeViz(
 
351
  # Now create a Plotly plot with the sorted_predictions
352
  fig = go.Figure()
353
 
354
+ # Set y values based on strand information
355
+ strand_y_values = {'1': 1, '-1': -1}
356
+
357
  # Iterate over the sorted predictions to create the plot
358
  for i, prediction in enumerate(sorted_predictions, start=1):
359
  # Extract data for plotting
360
+ chrom, start, end, strand, target, gRNA, pred_score = prediction # Adjusted to include the target sequence
361
+ # Assign y value based on strand
362
+ y_value = strand_y_values[str(strand)] # Convert strand to string for dict lookup
363
  fig.add_trace(go.Scatter(
364
  x=[start, end],
365
+ y=[y_value] * len(start), # Assign all points the same y value based on strand
366
  mode='lines+markers+text',
367
  name=f"gRNA: {gRNA}",
368
+ text=f"Rank: {i}", # Place text at the first point
369
  hoverinfo='text',
370
+ hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' else '-'}<br>Prediction Score: {pred_score:.4f}",
 
 
 
371
  ))
372
+
373
  # Update the layout of the plot
374
  fig.update_layout(
375
+ title='CRISPR Targets by Strand',
376
  xaxis_title='Genomic Position',
377
+ yaxis=dict(
378
+ title='Strand',
379
+ tickmode='array',
380
+ tickvals=[1, -1],
381
+ ticktext=['+ Strand', '- Strand']
382
+ )
383
  )
384
+
385
  # Display the plot
386
  st.plotly_chart(fig)
387
 
388
  # Ensure gene_sequence is not empty before generating files
389
  if gene_sequence:
390
+ # Define file paths
391
  genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
392
  bed_file_path = f"{gene_symbol}_crispr_targets.bed"
393
+ csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
394
 
395
+ # Generate files
396
+ cas9on.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
397
+ cas9on.create_bed_file_from_df(df, bed_file_path)
398
+ cas9on.create_csv_from_df(df, csv_file_path)
 
399
 
400
+ # File download selection
401
+ file_type = st.selectbox('Select file type to download:', ('GenBank', 'BED', 'CSV'))
402
 
403
+ if file_type == 'GenBank':
404
+ with open(genbank_file_path, "rb") as file:
405
+ st.download_button(label="Download GenBank File", data=file, file_name=genbank_file_path,
406
+ mime="text/x-genbank")
407
+ st.markdown(
408
+ "GenBank files can be visualized using [PyGenomeViz](https://pygenomeviz.streamlit.app/)")
409
+ elif file_type == 'BED':
410
+ with open(bed_file_path, "rb") as file:
411
+ st.download_button(label="Download BED File", data=file, file_name=bed_file_path,
412
+ mime="text/plain")
413
+ st.markdown(
414
+ "BED files can be used with the [UCSC Genome Browser](https://genome.ucsc.edu/cgi-bin/hgCustom)")
415
+ elif file_type == 'CSV':
416
+ with open(csv_file_path, "rb") as file:
417
+ st.download_button(label="Download CSV File", data=file, file_name=csv_file_path,
418
+ mime="text/csv")
419
 
420
  # Clean up old files after download buttons are created
421
  clean_up_old_files(gene_symbol)
cas12.py CHANGED
@@ -175,6 +175,8 @@ def generate_genbank_file_from_data(formatted_data, gene_sequence, gene_symbol,
175
  record.annotations["molecule_type"] = "DNA"
176
  SeqIO.write(record, output_path, "genbank")
177
 
 
 
178
 
179
  def generate_bed_file_from_data(formatted_data, output_path):
180
  with open(output_path, 'w') as bed_file:
 
175
  record.annotations["molecule_type"] = "DNA"
176
  SeqIO.write(record, output_path, "genbank")
177
 
178
+ def create_csv_from_df(df, output_path):
179
+ df.to_csv(output_path, index=False)
180
 
181
  def generate_bed_file_from_data(formatted_data, output_path):
182
  with open(output_path, 'w') as bed_file:
cas9on.py CHANGED
@@ -170,4 +170,7 @@ def create_bed_file_from_df(df, output_path):
170
  strand = '+' if row["Strand"] == '+' else '-'
171
  gRNA = row["gRNA"]
172
  score = str(row["Prediction"]) # Ensure score is converted to string if not already
173
- bed_file.write(f"{chrom}\t{start}\t{end}\t{gRNA}\t{score}\t{strand}\n")
 
 
 
 
170
  strand = '+' if row["Strand"] == '+' else '-'
171
  gRNA = row["gRNA"]
172
  score = str(row["Prediction"]) # Ensure score is converted to string if not already
173
+ bed_file.write(f"{chrom}\t{start}\t{end}\t{gRNA}\t{score}\t{strand}\n")
174
+
175
+ def create_csv_from_df(df, output_path):
176
+ df.to_csv(output_path, index=False)