VaultChem commited on
Commit
b9fbcbc
1 Parent(s): 80007ba

updated app format

Browse files
Files changed (2) hide show
  1. app.py +6 -6
  2. chemdata.py +2 -55
app.py CHANGED
@@ -25,7 +25,7 @@ from rdkit.Chem.Draw import rdMolDraw2D
25
  import pandas as pd
26
  from st_keyup import st_keyup
27
 
28
- st.set_page_config(layout="centered", page_title="VaultChem")
29
 
30
 
31
  def local_css(file_name):
@@ -60,7 +60,7 @@ formatted_text = (
60
  "<h1 style='text-align: center;'>"
61
  "<span style='color: red;'>Pharmacokinetics</span>"
62
  "<span style='color: black;'> of </span>"
63
- "<span style='color: blue;'>🤫confidential🤫</span>"
64
  "<span style='color: black;'> molecules</span>"
65
  "</h1>"
66
  )
@@ -82,9 +82,9 @@ The server on which the prediction is computed will never see the molecule in cl
82
  Why is this **magic**? Because this is equivalent to computing the prediction on the molecule in clear text, but without sharing the molecule with the server.
83
  Even if organization "B" - or in fact any other party - would try to steal the data, they would only see the encrypted molecular data.
84
  **Only the party that has the private key (organization "A") can decrypt the prediction**. This is possible using a method called "Fully Homomorphic Encryption" (FHE).
85
- This special encryption scheme allows to perform computations on encrypted data.
86
 
87
- We use the open-source library <a href="https://docs.zama.ai/concrete-ml" target="_blank">Concrete ML</a> to develop safe and robust encryption technology.
88
 
89
  The code used for the FHE prediction is available in the open-source library
90
  \n
@@ -103,7 +103,7 @@ st.divider()
103
 
104
  st.markdown(
105
  "<p style='text-align: center; color: grey;'>"
106
- + img_to_html("scheme2.png", width="80%")
107
  + "</p>",
108
  unsafe_allow_html=True,
109
  )
@@ -652,7 +652,7 @@ if __name__ == "__main__":
652
  st.markdown(
653
  """
654
  <div style="width: 100%; text-align: center; padding: 10px;">
655
- The app was built with <a href="https://docs.zama.ai/concrete-ml" target="_blank">Concrete ML</a>,
656
  an open-source library by <a href="https://www.zama.ai/" target="_blank">Zama</a>.
657
  </div>
658
  """,
 
25
  import pandas as pd
26
  from st_keyup import st_keyup
27
 
28
+ st.set_page_config(layout="wide", page_title="VaultChem")
29
 
30
 
31
  def local_css(file_name):
 
60
  "<h1 style='text-align: center;'>"
61
  "<span style='color: red;'>Pharmacokinetics</span>"
62
  "<span style='color: black;'> of </span>"
63
+ "<span style='color: blue;'>🤫confidential</span>"
64
  "<span style='color: black;'> molecules</span>"
65
  "</h1>"
66
  )
 
82
  Why is this **magic**? Because this is equivalent to computing the prediction on the molecule in clear text, but without sharing the molecule with the server.
83
  Even if organization "B" - or in fact any other party - would try to steal the data, they would only see the encrypted molecular data.
84
  **Only the party that has the private key (organization "A") can decrypt the prediction**. This is possible using a method called "Fully Homomorphic Encryption" (FHE).
85
+ This special encryption scheme allows to perform computations on encrypted data, to learn more about FHE, click [here](https://fhe.org/resources/).
86
 
87
+ We use the open-source library <a href="https://github.com/zama-ai/concrete-ml" target="_blank">Concrete-ML</a> to develop safe and robust encryption technology.
88
 
89
  The code used for the FHE prediction is available in the open-source library
90
  \n
 
103
 
104
  st.markdown(
105
  "<p style='text-align: center; color: grey;'>"
106
+ + img_to_html("scheme2.png", width="65%")
107
  + "</p>",
108
  unsafe_allow_html=True,
109
  )
 
652
  st.markdown(
653
  """
654
  <div style="width: 100%; text-align: center; padding: 10px;">
655
+ The app was built with <a href="https://github.com/zama-ai/concrete-ml" target="_blank">Concrete-ML</a>,
656
  an open-source library by <a href="https://www.zama.ai/" target="_blank">Zama</a>.
657
  </div>
658
  """,
chemdata.py CHANGED
@@ -153,60 +153,6 @@ def compute_descriptors_from_smiles_list(SMILES):
153
  return np.array(X)
154
 
155
 
156
- class ProcessToxChemData:
157
- def __init__(self, bits=256):
158
- self.bits = int(bits)
159
- if not os.path.exists("data"):
160
- os.makedirs("data")
161
- self.save_file = "data/" + "save_file_Tox" + str(self.bits) + ".pkl"
162
-
163
- if os.path.exists(self.save_file):
164
- with open(self.save_file, "rb") as file:
165
- self.adjusted_valid_entries_per_task = pickle.load(file)
166
- else:
167
- url = "https://github.com/deepchem/deepchem/blob/master/datasets/tox21.csv.gz?raw=true"
168
- response = requests.get(url)
169
- content = gzip.decompress(response.content)
170
- self.df = pd.read_csv(BytesIO(content))
171
- self.process()
172
- self.save_adjusted_data()
173
-
174
- def process(self):
175
- self.adjusted_valid_entries_per_task = {}
176
-
177
- # Iterating through each task column and extracting valid entries
178
- for task in self.df.columns[
179
- :-2
180
- ]: # Excluding mol_id and smiles from the iteration
181
- valid_entries = self.df.dropna(subset=[task])[["mol_id", "smiles", task]]
182
-
183
- valid_entries["fps"] = valid_entries["smiles"].apply(
184
- lambda x: generate_fingerprint(x, radius=2, bits=self.bits)
185
- )
186
- valid_entries = valid_entries.dropna(subset=["fps"])
187
- valid_entries["descriptors"] = valid_entries["smiles"].apply(
188
- lambda x: compute_descriptors_from_smiles_list([x])[0]
189
- )
190
- valid_entries = valid_entries.dropna(subset=["descriptors"])
191
- # Shuffle the rows
192
- valid_entries = valid_entries.sample(frac=1, random_state=42).reset_index(
193
- drop=True
194
- )
195
- self.adjusted_valid_entries_per_task[task] = valid_entries
196
- self.adjusted_valid_entries_per_task[
197
- task
198
- ] = self.adjusted_valid_entries_per_task[task].rename(columns={task: "y"})
199
-
200
- def save_adjusted_data(self):
201
- with open(self.save_file, "wb") as file:
202
- pickle.dump(self.adjusted_valid_entries_per_task, file)
203
-
204
- def get_X_y(self, task):
205
- X = np.float_(np.stack(self.adjusted_valid_entries_per_task[task].fps.values))
206
- y = self.adjusted_valid_entries_per_task[task].y.values.astype(int)
207
- return X, y
208
-
209
-
210
  class ProcessADMEChemData:
211
  def __init__(self, bits=512, radius=2):
212
  self.bits = int(bits)
@@ -291,7 +237,8 @@ def load_ADME_data(task, bits=256, radius=2):
291
  """
292
  data = ProcessADMEChemData(bits=bits, radius=radius)
293
  X, y = data.get_X_y(task)
294
- return train_test_split(X, y, test_size=0.2, random_state=42)
 
295
 
296
 
297
  class ProcessGenericChemData:
 
153
  return np.array(X)
154
 
155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  class ProcessADMEChemData:
157
  def __init__(self, bits=512, radius=2):
158
  self.bits = int(bits)
 
237
  """
238
  data = ProcessADMEChemData(bits=bits, radius=radius)
239
  X, y = data.get_X_y(task)
240
+ SMILES = data.adjusted_valid_entries_per_task[task]["smiles"].values
241
+ return train_test_split(SMILES,X, y, test_size=0.2, random_state=42)
242
 
243
 
244
  class ProcessGenericChemData: