supercat666 commited on
Commit
3ccc65d
1 Parent(s): e55148c
Files changed (1) hide show
  1. cas9on.py +15 -10
cas9on.py CHANGED
@@ -209,26 +209,31 @@ def create_bigwig(df, bigwig_path):
209
  if not all(column in df.columns for column in required_columns):
210
  raise ValueError(f"DataFrame must contain {required_columns} columns.")
211
 
212
- # Ensure all necessary columns are of correct type and sorted
213
- df = df.astype({"Chr": str, "Start Pos": int, "End Pos": int, "Prediction": float})
 
 
214
  df = df.sort_values(by=['Chr', 'Start Pos'])
215
 
216
- # Prepare the BigWig header
217
  chr_sizes = df.groupby('Chr')['End Pos'].max().to_dict()
218
  header = [(chr, size) for chr, size in chr_sizes.items()]
219
 
220
- # Create and write to the BigWig file
221
  bw = pyBigWig.open(bigwig_path, "w")
222
  bw.addHeader(header)
223
 
224
- # Add entries for each chromosome separately
225
- for chr, group in df.groupby('Chr'):
226
- starts = group['Start Pos'].values.tolist()
227
- ends = group['End Pos'].values.tolist()
228
- values = group['Prediction'].values.tolist()
229
- bw.addEntries(chr, starts, ends=ends, values=values)
 
230
 
231
  bw.close()
232
 
233
 
234
 
 
 
 
209
  if not all(column in df.columns for column in required_columns):
210
  raise ValueError(f"DataFrame must contain {required_columns} columns.")
211
 
212
+ # Convert columns to appropriate types and sort
213
+ df['Start Pos'] = df['Start Pos'].astype(int)
214
+ df['End Pos'] = df['End Pos'].astype(int)
215
+ df['Prediction'] = df['Prediction'].astype(float)
216
  df = df.sort_values(by=['Chr', 'Start Pos'])
217
 
218
+ # Prepare the BigWig header with chromosome sizes
219
  chr_sizes = df.groupby('Chr')['End Pos'].max().to_dict()
220
  header = [(chr, size) for chr, size in chr_sizes.items()]
221
 
222
+ # Initialize the BigWig file
223
  bw = pyBigWig.open(bigwig_path, "w")
224
  bw.addHeader(header)
225
 
226
+ # Iterate over each chromosome and add entries in sorted order
227
+ for chr in sorted(df['Chr'].unique()):
228
+ chrom_df = df[df['Chr'] == chr]
229
+ starts = chrom_df['Start Pos'].tolist()
230
+ ends = chrom_df['End Pos'].tolist()
231
+ values = chrom_df['Prediction'].tolist()
232
+ bw.addEntries([chr] * len(starts), starts, ends=ends, values=values)
233
 
234
  bw.close()
235
 
236
 
237
 
238
+
239
+