supercat666 commited on
Commit
a5afc1a
1 Parent(s): adf804d

fixed cas9off

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. cas9off.py +9 -29
app.py CHANGED
@@ -94,8 +94,8 @@ if selected_model == 'Cas9':
94
  if target_selection == 'on-target':
95
 
96
  pass
97
- elif target_selection == 'off-target':
98
 
 
99
  ENTRY_METHODS = dict(
100
  manual='Manual entry of target sequence',
101
  txt="txt file upload"
 
94
  if target_selection == 'on-target':
95
 
96
  pass
 
97
 
98
+ elif target_selection == 'off-target':
99
  ENTRY_METHODS = dict(
100
  manual='Manual entry of target sequence',
101
  txt="txt file upload"
cas9off.py CHANGED
@@ -4,28 +4,12 @@ import pandas as pd
4
  import os
5
  import argparse
6
 
7
- # column names
8
- ID_COL = 'Transcript ID'
9
- SEQ_COL = 'Transcript Sequence'
10
-
11
  # configure GPUs
12
  for gpu in tf.config.list_physical_devices('GPU'):
13
  tf.config.experimental.set_memory_growth(gpu, enable=True)
14
  if len(tf.config.list_physical_devices('GPU')) > 0:
15
  tf.config.experimental.set_visible_devices(tf.config.list_physical_devices('GPU')[0], 'GPU')
16
 
17
- # application configuration
18
- BATCH_SIZE_COMPUTE = 500
19
- BATCH_SIZE_SCAN = 20
20
- BATCH_SIZE_TRANSCRIPTS = 50
21
- NUM_TOP_GUIDES = 10
22
- NUM_MISMATCHES = 3
23
- RUN_MODES = dict(
24
- all='All on-target guides per transcript',
25
- top_guides='Top {:d} guides per transcript'.format(NUM_TOP_GUIDES),
26
- titration='Top {:d} guides per transcript & their titration candidates'.format(NUM_TOP_GUIDES)
27
- )
28
-
29
  class Encoder:
30
  def __init__(self, on_seq, off_seq, with_category = False, label = None, with_reg_val = False, value = None):
31
  tlen = 24
@@ -81,9 +65,6 @@ class Encoder:
81
  on_off_dim7_codes.append(np.concatenate((diff_code, dir_code)))
82
  self.on_off_code = np.array(on_off_dim7_codes)
83
 
84
-
85
-
86
-
87
  def encode_on_off_seq_pairs(input_file):
88
  inputs = pd.read_csv(input_file, delimiter=",", header=None, names=['on_seq', 'off_seq'])
89
  input_codes = []
@@ -107,37 +88,36 @@ def CRISPR_net_predict(X_test):
107
  y_pred = loaded_model.predict(X_test).flatten()
108
  return y_pred
109
 
 
110
  def process_input_and_predict(input_data, input_type='manual'):
111
  if input_type == 'manual':
112
- # Process manual input string into DataFrame
113
  sequences = [seq.split(',') for seq in input_data.split('\n')]
114
  inputs = pd.DataFrame(sequences, columns=['on_seq', 'off_seq'])
115
  elif input_type == 'file':
116
- # Read sequences from a file into DataFrame
117
  inputs = pd.read_csv(input_data, delimiter=",", header=None, names=['on_seq', 'off_seq'])
118
 
119
- # Encode the sequences
120
  input_codes = []
121
  for idx, row in inputs.iterrows():
122
  on_seq = row['on_seq']
123
  off_seq = row['off_seq']
124
-
125
- # Validate on_seq and off_seq
126
  if not on_seq or not off_seq:
127
- # Skip the current row if either on_seq or off_seq is missing or empty
128
  continue
129
 
130
  en = Encoder(on_seq=on_seq, off_seq=off_seq)
131
  input_codes.append(en.on_off_code)
 
132
 
133
- # Convert to numpy array and reshape for the model
134
  input_codes = np.array(input_codes)
135
  input_codes = input_codes.reshape((len(input_codes), 1, 24, 7))
136
 
137
- # Predict with CRISPR-Net model
138
- inputs['CRISPR_Net_score'] = CRISPR_net_predict(input_codes)
 
 
 
139
 
140
- return inputs
141
 
142
  if __name__ == '__main__':
143
  parser = argparse.ArgumentParser(description="CRISPR-Net v1.0 (Aug 10 2019)")
 
4
  import os
5
  import argparse
6
 
 
 
 
 
7
  # configure GPUs
8
  for gpu in tf.config.list_physical_devices('GPU'):
9
  tf.config.experimental.set_memory_growth(gpu, enable=True)
10
  if len(tf.config.list_physical_devices('GPU')) > 0:
11
  tf.config.experimental.set_visible_devices(tf.config.list_physical_devices('GPU')[0], 'GPU')
12
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  class Encoder:
14
  def __init__(self, on_seq, off_seq, with_category = False, label = None, with_reg_val = False, value = None):
15
  tlen = 24
 
65
  on_off_dim7_codes.append(np.concatenate((diff_code, dir_code)))
66
  self.on_off_code = np.array(on_off_dim7_codes)
67
 
 
 
 
68
  def encode_on_off_seq_pairs(input_file):
69
  inputs = pd.read_csv(input_file, delimiter=",", header=None, names=['on_seq', 'off_seq'])
70
  input_codes = []
 
88
  y_pred = loaded_model.predict(X_test).flatten()
89
  return y_pred
90
 
91
+
92
  def process_input_and_predict(input_data, input_type='manual'):
93
  if input_type == 'manual':
 
94
  sequences = [seq.split(',') for seq in input_data.split('\n')]
95
  inputs = pd.DataFrame(sequences, columns=['on_seq', 'off_seq'])
96
  elif input_type == 'file':
 
97
  inputs = pd.read_csv(input_data, delimiter=",", header=None, names=['on_seq', 'off_seq'])
98
 
99
+ valid_inputs = []
100
  input_codes = []
101
  for idx, row in inputs.iterrows():
102
  on_seq = row['on_seq']
103
  off_seq = row['off_seq']
 
 
104
  if not on_seq or not off_seq:
 
105
  continue
106
 
107
  en = Encoder(on_seq=on_seq, off_seq=off_seq)
108
  input_codes.append(en.on_off_code)
109
+ valid_inputs.append((on_seq, off_seq))
110
 
 
111
  input_codes = np.array(input_codes)
112
  input_codes = input_codes.reshape((len(input_codes), 1, 24, 7))
113
 
114
+ y_pred = CRISPR_net_predict(input_codes)
115
+
116
+ # Create a new DataFrame from valid inputs and predictions
117
+ result_df = pd.DataFrame(valid_inputs, columns=['on_seq', 'off_seq'])
118
+ result_df['CRISPR_Net_score'] = y_pred
119
 
120
+ return result_df
121
 
122
  if __name__ == '__main__':
123
  parser = argparse.ArgumentParser(description="CRISPR-Net v1.0 (Aug 10 2019)")