Spaces:

NiniCat
/

CRISPRTool

Sleeping

App Files Files Community

supercat666 commited on Mar 4

Commit

242350b

•

1 Parent(s): 2b3514d

fix

Browse files

Files changed (2) hide show

app.py +47 -22
cas9on.py +30 -2

app.py CHANGED Viewed

@@ -144,13 +144,20 @@ if selected_model == 'Cas9':
         # Prediction button
         predict_button = st.button('Predict on-target')
         # Process predictions
         if predict_button and gene_symbol:
             with st.spinner('Predicting... Please wait'):
-                predictions, gene_sequence = cas9on.process_gene(gene_symbol, cas9on_path)
                 sorted_predictions = sorted(predictions, key=lambda x: x[-1], reverse=True)[:10]
                 st.session_state['on_target_results'] = sorted_predictions
                 st.session_state['gene_sequence'] = gene_sequence  # Save gene sequence in session state
             # Notify the user once the process is completed successfully.
             st.success('Prediction completed!')
@@ -162,44 +169,64 @@ if selected_model == 'Cas9':
                 df = pd.DataFrame(st.session_state['on_target_results'],
                                   columns=["Gene ID", "Start Pos", "End Pos", "Strand", "Target", "gRNA", "Prediction"])
                 st.dataframe(df)
-                # Now create a Plotly plot with the sorted_predictions
                 fig = go.Figure()
                 # Initialize the y position for the positive and negative strands
                 positive_strand_y = 0.1
                 negative_strand_y = -0.1
-                # Use an offset to spread gRNA sequences vertically
-                offset = 0.05
                 # Iterate over the sorted predictions to create the plot
-                for i, prediction in enumerate(sorted_predictions, start=1):
-                    # Extract data for plotting and convert start and end to integers
                     chrom, start, end, strand, target, gRNA, pred_score = prediction
                     start, end = int(start), int(end)
                     midpoint = (start + end) / 2
-                    # Set the y-value and arrow symbol based on the strand
-                    if strand == '1':
                         y_value = positive_strand_y
                         arrow_symbol = 'triangle-right'
-                        # Increment the y-value for the next positive strand gRNA
                         positive_strand_y += offset
-                    else:
                         y_value = negative_strand_y
                         arrow_symbol = 'triangle-left'
-                        # Decrement the y-value for the next negative strand gRNA
                         negative_strand_y -= offset
                     fig.add_trace(go.Scatter(
                         x=[midpoint],
-                        y=[y_value],  # Use the y_value set above for the strand
                         mode='markers+text',
                         marker=dict(symbol=arrow_symbol, size=10),
                         name=f"gRNA: {gRNA}",
-                        text=f"Rank: {i}",  # Place text at the marker
                         hoverinfo='text',
-                        hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == 1 else '-'}<br>Prediction Score: {pred_score:.4f}",
                     ))
                 # Update the layout of the plot
@@ -208,14 +235,12 @@ if selected_model == 'Cas9':
                     xaxis_title='Genomic Position',
                     yaxis=dict(
                         title='Strand',
-                        showgrid=True,  # Show horizontal gridlines for clarity
-                        zeroline=True,  # Show a line at y=0 to represent the axis
-                        zerolinecolor='Black',
-                        zerolinewidth=2,
-                        tickvals=[positive_strand_y, negative_strand_y],
-                        ticktext=['+ Strand', '- Strand']
                     ),
-                    showlegend=False  # Hide the legend if it's not necessary
                 )
                 # Display the plot

         # Prediction button
         predict_button = st.button('Predict on-target')
+        if 'exons' not in st.session_state:
+            st.session_state['exons'] = []
+        if 'cds' not in st.session_state:
+            st.session_state['cds'] = []
         # Process predictions
         if predict_button and gene_symbol:
             with st.spinner('Predicting... Please wait'):
+                predictions, gene_sequence, exons, cds = cas9on.process_gene(gene_symbol, cas9on_path)
                 sorted_predictions = sorted(predictions, key=lambda x: x[-1], reverse=True)[:10]
                 st.session_state['on_target_results'] = sorted_predictions
                 st.session_state['gene_sequence'] = gene_sequence  # Save gene sequence in session state
+                st.session_state['exons'] = exons  # Store exon data
+                st.session_state['cds'] = cds  # Store CDS data
             # Notify the user once the process is completed successfully.
             st.success('Prediction completed!')
                 df = pd.DataFrame(st.session_state['on_target_results'],
                                   columns=["Gene ID", "Start Pos", "End Pos", "Strand", "Target", "gRNA", "Prediction"])
                 st.dataframe(df)
+                # Now create a Plotly plot with the sorted_predictions# Initialize Plotly figure
+                # Initialize Plotly figure
                 fig = go.Figure()
+                # Plot Exons as horizontal lines or rectangles
+                exon_y = 0.2  # Adjust this as needed
+                for exon in st.session_state['exons']:
+                    exon_start, exon_end = int(exon['start']), int(exon['end'])
+                    fig.add_trace(go.Scatter(
+                        x=[exon_start, exon_end],
+                        y=[exon_y, exon_y],
+                        mode='lines',
+                        line=dict(color='purple', width=10),  # Adjust styling as needed
+                        name='Exon'
+                    ))
+                # Plot CDS as horizontal lines or rectangles
+                cds_y = 0.3  # Adjust this as needed
+                for cds in st.session_state['cds']:
+                    cds_start, cds_end = int(cds['start']), int(cds['end'])
+                    fig.add_trace(go.Scatter(
+                        x=[cds_start, cds_end],
+                        y=[cds_y, cds_y],
+                        mode='lines',
+                        line=dict(color='blue', width=10),  # Adjust styling as needed
+                        name='CDS'
+                    ))
+                # Plot gRNAs using triangles to indicate direction
                 # Initialize the y position for the positive and negative strands
                 positive_strand_y = 0.1
                 negative_strand_y = -0.1
+                offset = 0.05  # Use an offset to spread gRNA sequences vertically
                 # Iterate over the sorted predictions to create the plot
+                for i, prediction in enumerate(st.session_state['on_target_results'], start=1):
                     chrom, start, end, strand, target, gRNA, pred_score = prediction
                     start, end = int(start), int(end)
                     midpoint = (start + end) / 2
+                    if strand == '1':  # Positive strand
                         y_value = positive_strand_y
                         arrow_symbol = 'triangle-right'
                         positive_strand_y += offset
+                    else:  # Negative strand
                         y_value = negative_strand_y
                         arrow_symbol = 'triangle-left'
                         negative_strand_y -= offset
                     fig.add_trace(go.Scatter(
                         x=[midpoint],
+                        y=[y_value],
                         mode='markers+text',
                         marker=dict(symbol=arrow_symbol, size=10),
                         name=f"gRNA: {gRNA}",
+                        text=f"Rank: {i}",
                         hoverinfo='text',
+                        hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' else '-'}<br>Prediction Score: {pred_score:.4f}",
                     ))
                 # Update the layout of the plot
                     xaxis_title='Genomic Position',
                     yaxis=dict(
                         title='Strand',
+                        showgrid=True,
+                        zeroline=False,
+                        tickvals=[positive_strand_y, negative_strand_y, exon_y, cds_y],
+                        ticktext=['+ Strand gRNAs', '- Strand gRNAs', 'Exons', 'CDS']
                     ),
+                    showlegend=True
                 )
                 # Display the plot

cas9on.py CHANGED Viewed

@@ -104,6 +104,7 @@ def find_crispr_targets(sequence, chr, start, strand, pam="NGG", target_length=2
     return targets
 def process_gene(gene_symbol, model_path):
     transcripts = fetch_ensembl_transcripts(gene_symbol)
     all_data = []
@@ -118,14 +119,41 @@ def process_gene(gene_symbol, model_path):
             # Fetch the sequence here and concatenate if multiple transcripts
             gene_sequence += fetch_ensembl_sequence(transcript_id) or ''
             if gene_sequence:
                 gRNA_sites = find_crispr_targets(gene_sequence, chr, start, strand)
                 if gRNA_sites:
                     formatted_data = format_prediction_output(gRNA_sites, model_path)
                     all_data.extend(formatted_data)
-    # Return both the data and the fetched sequence
-    return all_data, gene_sequence
 def create_genbank_features(formatted_data):
     features = []

     return targets
 def process_gene(gene_symbol, model_path):
     transcripts = fetch_ensembl_transcripts(gene_symbol)
     all_data = []
             # Fetch the sequence here and concatenate if multiple transcripts
             gene_sequence += fetch_ensembl_sequence(transcript_id) or ''
+            # Fetch exon and CDS information
+            exons = fetch_ensembl_exons(transcript_id)
+            cds_list = fetch_ensembl_cds(transcript_id)
+            # You might want to do something specific with exons and CDS information here
+            # For example, store them, print them, or include them in your analysis
             if gene_sequence:
                 gRNA_sites = find_crispr_targets(gene_sequence, chr, start, strand)
                 if gRNA_sites:
                     formatted_data = format_prediction_output(gRNA_sites, model_path)
                     all_data.extend(formatted_data)
+    # Return the data, fetched sequence, and possibly exon/CDS data
+    return all_data, gene_sequence, exons, cds_list
+def fetch_ensembl_exons(transcript_id):
+    """Fetch exon information for a given transcript from Ensembl."""
+    url = f"https://rest.ensembl.org/overlap/id/{transcript_id}?feature=exon;content-type=application/json"
+    response = requests.get(url)
+    if response.status_code == 200:
+        return response.json()  # Returns a list of exons for the transcript
+    else:
+        print(f"Error fetching exon data from Ensembl for transcript {transcript_id}: {response.text}")
+        return None
+def fetch_ensembl_cds(transcript_id):
+    """Fetch coding sequence (CDS) information for a given transcript from Ensembl."""
+    url = f"https://rest.ensembl.org/overlap/id/{transcript_id}?feature=cds;content-type=application/json"
+    response = requests.get(url)
+    if response.status_code == 200:
+        return response.json()  # Returns a list of CDS regions for the transcript
+    else:
+        print(f"Error fetching CDS data from Ensembl for transcript {transcript_id}: {response.text}")
+        return None
 def create_genbank_features(formatted_data):
     features = []