jonas-verhellen commited on
Commit
8da2b6e
1 Parent(s): b699a86

Output Reformat

Browse files
__pycache__/illuminate.cpython-310.pyc ADDED
Binary file (7.15 kB). View file
 
app.py CHANGED
@@ -7,8 +7,13 @@ import gradio as gr
7
  from illuminate import Illuminate
8
  import matplotlib.pyplot as plt
9
 
 
 
10
 
11
-
 
 
 
12
 
13
 
14
  def launch_illumination(target, representation, surrogate, acquisition, ranges, generations_max, function_calls_max, structural_filters):
@@ -62,20 +67,27 @@ def launch_illumination(target, representation, surrogate, acquisition, ranges,
62
  stats_file = pd.read_csv("statistics.csv")
63
  molecules_file = pd.read_csv("molecules.csv")
64
 
65
- files_in_directory = os.listdir('.')
 
 
 
 
 
66
 
67
- pattern = re.compile(r'archive_(\d+)\.csv')
68
- archive_files = [f for f in files_in_directory if pattern.match(f)]
69
- archive_numbers = [int(pattern.search(f).group(1)) for f in archive_files]
70
- archive_file = pd.read_csv(f'archive_{max(archive_numbers)}.csv')
71
 
72
- csv_files = [file for file in files_in_directory if file.endswith('.csv')]
 
 
73
 
74
- for csv_file in csv_files:
75
- if os.path.isfile(csv_file):
76
- os.remove(csv_file)
 
 
 
77
 
78
- return stats_file, molecules_file #, archive_file
79
 
80
  def validate_and_process(target, representation, surrogate, acquisition, exact_mol_wt_min, exact_mol_wt_max, mol_log_p_min, mol_log_p_max, tpsa_min, tpsa_max, mol_mr_min, mol_mr_max, generations_max, function_calls_max, structural_filters):
81
  # Ensure min is less than max for each range
@@ -91,9 +103,8 @@ def validate_and_process(target, representation, surrogate, acquisition, exact_m
91
  mol_mr_range
92
  ]
93
 
94
- stats_file, molecules_file = launch_illumination(target, representation, surrogate, acquisition, ranges, generations_max, function_calls_max, structural_filters)
95
- return stats_file, molecules_file
96
-
97
 
98
  def gradio_interface():
99
  with gr.Blocks() as demo:
@@ -128,16 +139,11 @@ def gradio_interface():
128
  mol_mr_min = gr.Slider(minimum=0, maximum=250, value=40, step=1, label="Minimum Molecular Refractivity")
129
  mol_mr_max = gr.Slider(minimum=0, maximum=250, value=130, step=1, label="Maximum Molecular Refractivity")
130
 
131
- def plot_csv(file_path):
132
- df = pd.read_csv(file_path)
133
- fig, ax = plt.subplots()
134
- df.plot(ax=ax)
135
- return fig
136
-
137
  submit_btn = gr.Button("Submit")
138
 
139
- output_df_1 = gr.Dataframe(label="Optimisation History")
140
- output_df_2 = gr.Dataframe(label="Output Molecules")
 
141
 
142
  submit_btn.click(
143
  validate_and_process,
@@ -158,7 +164,7 @@ def gradio_interface():
158
  function_calls_max,
159
  structural_filters,
160
  ],
161
- outputs=[output_df_1, output_df_2]
162
  )
163
  demo.launch()
164
 
 
7
  from illuminate import Illuminate
8
  import matplotlib.pyplot as plt
9
 
10
+ from rdkit import Chem
11
+ from rdkit.Chem import Draw
12
 
13
+ def MolsMatrixToGridImage(mols, legends, filename):
14
+ img = Draw.MolsToGridImage(mols, molsPerRow=5, subImgSize=(400,400), legends=legends)
15
+ img.save(filename)
16
+ return img
17
 
18
 
19
  def launch_illumination(target, representation, surrogate, acquisition, ranges, generations_max, function_calls_max, structural_filters):
 
67
  stats_file = pd.read_csv("statistics.csv")
68
  molecules_file = pd.read_csv("molecules.csv")
69
 
70
+ # files_in_directory = os.listdir('.')
71
+
72
+ # pattern = re.compile(r'archive_(\d+)\.csv')
73
+ # archive_files = [f for f in files_in_directory if pattern.match(f)]
74
+ # archive_numbers = [int(pattern.search(f).group(1)) for f in archive_files]
75
+ # archive_file = pd.read_csv(f'archive_{max(archive_numbers)}.csv')
76
 
77
+ # csv_files = [file for file in files_in_directory if file.endswith('.csv')]
 
 
 
78
 
79
+ # for csv_file in csv_files:
80
+ # if os.path.isfile(csv_file):
81
+ # os.remove(csv_file)
82
 
83
+ top_molecules = molecules_file.nlargest(10, 'fitness')
84
+ top_smiles = top_molecules['smiles'].tolist()
85
+ top_fitness = top_molecules['fitness'].tolist()
86
+ top_mols = [Chem.MolFromSmiles(smile) for smile in top_smiles]
87
+ top_legends = [f'Similarity: {score:.5f}' for score in top_fitness]
88
+ image = MolsMatrixToGridImage(mols=top_mols, legends=top_legends, filename='top_molecules_grid.png')
89
 
90
+ return image, stats_file, molecules_file
91
 
92
  def validate_and_process(target, representation, surrogate, acquisition, exact_mol_wt_min, exact_mol_wt_max, mol_log_p_min, mol_log_p_max, tpsa_min, tpsa_max, mol_mr_min, mol_mr_max, generations_max, function_calls_max, structural_filters):
93
  # Ensure min is less than max for each range
 
103
  mol_mr_range
104
  ]
105
 
106
+ image, stats_file, molecules_file = launch_illumination(target, representation, surrogate, acquisition, ranges, generations_max, function_calls_max, structural_filters)
107
+ return image
 
108
 
109
  def gradio_interface():
110
  with gr.Blocks() as demo:
 
139
  mol_mr_min = gr.Slider(minimum=0, maximum=250, value=40, step=1, label="Minimum Molecular Refractivity")
140
  mol_mr_max = gr.Slider(minimum=0, maximum=250, value=130, step=1, label="Maximum Molecular Refractivity")
141
 
 
 
 
 
 
 
142
  submit_btn = gr.Button("Submit")
143
 
144
+ output_image = gr.Image(label="Top Molecules")
145
+ gr.DownloadButton(label=f"Download Optimisation History", value="./statistics.csv", visible=True)
146
+ gr.DownloadButton(label=f"Download Output Molecules", value="./molecules.csv", visible=True)
147
 
148
  submit_btn.click(
149
  validate_and_process,
 
164
  function_calls_max,
165
  structural_filters,
166
  ],
167
+ outputs=[output_image]
168
  )
169
  demo.launch()
170
 
illumination/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/illumination/__pycache__/__init__.cpython-310.pyc and b/illumination/__pycache__/__init__.cpython-310.pyc differ
 
illumination/__pycache__/base.cpython-310.pyc CHANGED
Binary files a/illumination/__pycache__/base.cpython-310.pyc and b/illumination/__pycache__/base.cpython-310.pyc differ
 
illumination/__pycache__/infrastructure.cpython-310.pyc CHANGED
Binary files a/illumination/__pycache__/infrastructure.cpython-310.pyc and b/illumination/__pycache__/infrastructure.cpython-310.pyc differ
 
illumination/__pycache__/mechanism.cpython-310.pyc CHANGED
Binary files a/illumination/__pycache__/mechanism.cpython-310.pyc and b/illumination/__pycache__/mechanism.cpython-310.pyc differ
 
illumination/__pycache__/operations.cpython-310.pyc CHANGED
Binary files a/illumination/__pycache__/operations.cpython-310.pyc and b/illumination/__pycache__/operations.cpython-310.pyc differ
 
illumination/app.py DELETED
@@ -1,163 +0,0 @@
1
- import os
2
- import re
3
- import logging
4
- import pandas as pd
5
- from omegaconf import OmegaConf
6
- import gradio as gr
7
- from illuminate import Illuminate
8
- import matplotlib.pyplot as plt
9
-
10
- def launch_illumination(target, representation, surrogate, acquisition, ranges, generations_max, function_calls_max, structural_filters):
11
- config = {
12
- 'controller': {
13
- 'max_generations': generations_max,
14
- 'max_fitness_calls': function_calls_max
15
- },
16
- 'archive': {
17
- 'name': 'Troglitazone',
18
- 'size': 150,
19
- 'accuracy': 25000
20
- },
21
- 'descriptor': {
22
- 'properties': [
23
- 'Descriptors.ExactMolWt',
24
- 'Descriptors.MolLogP',
25
- 'Descriptors.TPSA',
26
- 'Crippen.MolMR'
27
- ],
28
- 'ranges': ranges
29
- },
30
- 'fitness': {
31
- 'type': 'Fingerprint',
32
- 'target': target,
33
- 'representation': representation
34
- },
35
- 'arbiter': {
36
- 'rules': [rule_set for rule_set in structural_filters]
37
- },
38
- 'generator': {
39
- 'batch_size': 40,
40
- 'initial_size': 40,
41
- 'mutation_data': 'data/smarts/mutation_collection.tsv',
42
- 'initial_data': 'data/smiles/guacamol_intitial_rediscovery_troglitazone.smi'
43
- },
44
- 'surrogate': {
45
- 'type': "Fingerprint",
46
- 'representation': surrogate,
47
- },
48
- 'acquisition': {
49
- 'type': acquisition,
50
- 'beta': 0.3
51
- }
52
- }
53
- log = logging.getLogger(__name__)
54
- log.info(OmegaConf.to_yaml(config))
55
- current_instance = Illuminate(OmegaConf.create(config))
56
- current_instance()
57
-
58
- stats_file = pd.read_csv("statistics.csv")
59
- molecules_file = pd.read_csv("molecules.csv")
60
-
61
- files_in_directory = os.listdir('.')
62
-
63
- pattern = re.compile(r'archive_(\d+)\.csv')
64
- archive_files = [f for f in files_in_directory if pattern.match(f)]
65
- archive_numbers = [int(pattern.search(f).group(1)) for f in archive_files]
66
- archive_file = pd.read_csv(f'archive_{max(archive_numbers)}.csv')
67
-
68
- csv_files = [file for file in files_in_directory if file.endswith('.csv')]
69
-
70
- for csv_file in csv_files:
71
- if os.path.isfile(csv_file):
72
- os.remove(csv_file)
73
-
74
- return stats_file, molecules_file #, archive_file
75
-
76
- def validate_and_process(target, representation, surrogate, acquisition, exact_mol_wt_min, exact_mol_wt_max, mol_log_p_min, mol_log_p_max, tpsa_min, tpsa_max, mol_mr_min, mol_mr_max, generations_max, function_calls_max, structural_filters):
77
- # Ensure min is less than max for each range
78
- exact_mol_wt_range = sorted([exact_mol_wt_min, exact_mol_wt_max])
79
- mol_log_p_range = sorted([mol_log_p_min, mol_log_p_max])
80
- tpsa_range = sorted([tpsa_min, tpsa_max])
81
- mol_mr_range = sorted([mol_mr_min, mol_mr_max])
82
-
83
- ranges = [
84
- exact_mol_wt_range,
85
- mol_log_p_range,
86
- tpsa_range,
87
- mol_mr_range
88
- ]
89
-
90
- stats_file, molecules_file = launch_illumination(target, representation, surrogate, acquisition, ranges, generations_max, function_calls_max, structural_filters)
91
- return stats_file, molecules_file
92
-
93
- def gradio_interface():
94
- with gr.Blocks() as demo:
95
-
96
- representation_options = ["ECFP4", "ECFP6", "FCFP4", "FCFP6"]
97
- surrogate_options = ["ECFP4", "ECFP6", "FCFP4", "FCFP6", "RDFP", "APFP", "TTFP"]
98
- acquisition_options = ["Mean", "UCB", "EI", "logEI"]
99
-
100
- target = gr.Textbox(label="Target (SMILES)", value="O=C1NC(=O)SC1Cc4ccc(OCC3(Oc2c(c(c(O)c(c2CC3)C)C)C)C)cc4")
101
-
102
- with gr.Row():
103
- generations_max = gr.Slider(minimum=0, maximum=150, value=1, step=1, label="Generations")
104
- function_calls_max = gr.Slider(minimum=0, maximum=15000, value=5000, step=100, label="Function Calls")
105
-
106
- structural_filters = gr.CheckboxGroup(["BMS", "Dundee", "Glaxo", "Inpharmatica", "LINT", "MLSMR", "PAINS", "SureChEMBL"], label="Structural Filters")
107
-
108
- with gr.Row():
109
- representation = gr.Dropdown(choices=representation_options, value="ECFP4", label="Fitness Representation")
110
- surrogate = gr.Dropdown(choices=surrogate_options, value="ECFP4", label="Surrogate Representation")
111
- acquisition = gr.Dropdown(choices=acquisition_options, value="Mean", label="Acquisition Function")
112
-
113
- with gr.Accordion("Physicochemical Descriptors", open=False):
114
- with gr.Row():
115
- exact_mol_wt_min = gr.Slider(minimum=0, maximum=885, value=225, step=1, label="Minimum Molecular Weight")
116
- exact_mol_wt_max = gr.Slider(minimum=0, maximum=885, value=555, step=1, label="Maximum Molecular Weight")
117
- with gr.Row():
118
- mol_log_p_min = gr.Slider(minimum=-4, maximum=8, value=-0.5, step=0.1, label="Minimum Log(P)")
119
- mol_log_p_max = gr.Slider(minimum=-4, maximum=8, value=5.5, step=0.1, label="Maximum Log(P)")
120
- with gr.Row():
121
- tpsa_min = gr.Slider(minimum=0, maximum=250, value=0, step=1, label="Minimum TPSA")
122
- tpsa_max = gr.Slider(minimum=0, maximum=250, value=140, step=1, label="Maximum TPSA")
123
- with gr.Row():
124
- mol_mr_min = gr.Slider(minimum=0, maximum=250, value=40, step=1, label="Minimum Molecular Refractivity")
125
- mol_mr_max = gr.Slider(minimum=0, maximum=250, value=130, step=1, label="Maximum Molecular Refractivity")
126
-
127
- def plot_csv(file_path):
128
- df = pd.read_csv(file_path)
129
- fig, ax = plt.subplots()
130
- df.plot(ax=ax)
131
- return fig
132
-
133
- submit_btn = gr.Button("Submit")
134
-
135
- output_df_1 = gr.Dataframe(label="Optimisation History")
136
- output_df_2 = gr.Dataframe(label="Output Molecules")
137
-
138
- submit_btn.click(
139
- validate_and_process,
140
- inputs=[
141
- target,
142
- representation,
143
- surrogate,
144
- acquisition,
145
- exact_mol_wt_min,
146
- exact_mol_wt_max,
147
- mol_log_p_min,
148
- mol_log_p_max,
149
- tpsa_min,
150
- tpsa_max,
151
- mol_mr_min,
152
- mol_mr_max,
153
- generations_max,
154
- function_calls_max,
155
- structural_filters,
156
- ],
157
- outputs=[output_df_1, output_df_2] #, plot2, plot3]
158
- )
159
-
160
- demo.launch()
161
-
162
- if __name__ == "__main__":
163
- gradio_interface()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
illumination/functions/__pycache__/acquisition.cpython-310.pyc CHANGED
Binary files a/illumination/functions/__pycache__/acquisition.cpython-310.pyc and b/illumination/functions/__pycache__/acquisition.cpython-310.pyc differ
 
illumination/functions/__pycache__/fitness.cpython-310.pyc CHANGED
Binary files a/illumination/functions/__pycache__/fitness.cpython-310.pyc and b/illumination/functions/__pycache__/fitness.cpython-310.pyc differ
 
illumination/functions/__pycache__/surrogate.cpython-310.pyc CHANGED
Binary files a/illumination/functions/__pycache__/surrogate.cpython-310.pyc and b/illumination/functions/__pycache__/surrogate.cpython-310.pyc differ