supercat666 commited on
Commit
e55148c
1 Parent(s): 7fa8fbf
Files changed (1) hide show
  1. cas9on.py +12 -11
cas9on.py CHANGED
@@ -205,29 +205,30 @@ def process_gene(gene_symbol, model_path):
205
 
206
  def create_bigwig(df, bigwig_path):
207
  # Ensure the dataframe has the required columns
208
- if not all(column in df.columns for column in ["Chr", "Start Pos", "End Pos", "Prediction"]):
209
- raise ValueError("DataFrame must contain 'Chr', 'Start Pos', 'End Pos', and 'Prediction' columns.")
 
210
 
211
- # Convert positions to integers and sort the DataFrame
212
- df['Start Pos'] = df['Start Pos'].astype(int)
213
- df['End Pos'] = df['End Pos'].astype(int)
214
  df = df.sort_values(by=['Chr', 'Start Pos'])
215
 
216
  # Prepare the BigWig header
217
  chr_sizes = df.groupby('Chr')['End Pos'].max().to_dict()
218
- header = [(str(chr), size) for chr, size in chr_sizes.items()]
219
 
220
  # Create and write to the BigWig file
221
  bw = pyBigWig.open(bigwig_path, "w")
222
  bw.addHeader(header)
223
 
224
- # Group by chromosome and add entries
225
  for chr, group in df.groupby('Chr'):
226
- starts = group['Start Pos'].tolist()
227
- ends = group['End Pos'].tolist()
228
- values = group['Prediction'].astype(float).tolist()
229
- bw.addEntries([str(chr)] * len(starts), starts, ends=ends, values=values)
230
 
231
  bw.close()
232
 
233
 
 
 
205
 
206
  def create_bigwig(df, bigwig_path):
207
  # Ensure the dataframe has the required columns
208
+ required_columns = ["Chr", "Start Pos", "End Pos", "Prediction"]
209
+ if not all(column in df.columns for column in required_columns):
210
+ raise ValueError(f"DataFrame must contain {required_columns} columns.")
211
 
212
+ # Ensure all necessary columns are of correct type and sorted
213
+ df = df.astype({"Chr": str, "Start Pos": int, "End Pos": int, "Prediction": float})
 
214
  df = df.sort_values(by=['Chr', 'Start Pos'])
215
 
216
  # Prepare the BigWig header
217
  chr_sizes = df.groupby('Chr')['End Pos'].max().to_dict()
218
+ header = [(chr, size) for chr, size in chr_sizes.items()]
219
 
220
  # Create and write to the BigWig file
221
  bw = pyBigWig.open(bigwig_path, "w")
222
  bw.addHeader(header)
223
 
224
+ # Add entries for each chromosome separately
225
  for chr, group in df.groupby('Chr'):
226
+ starts = group['Start Pos'].values.tolist()
227
+ ends = group['End Pos'].values.tolist()
228
+ values = group['Prediction'].values.tolist()
229
+ bw.addEntries(chr, starts, ends=ends, values=values)
230
 
231
  bw.close()
232
 
233
 
234
+