AryanRajSaxena commited on
Commit
7cd4053
1 Parent(s): f1aadc4

application file

Browse files
Files changed (3) hide show
  1. README.md +41 -9
  2. app.py +216 -0
  3. requirements.txt +6 -0
README.md CHANGED
@@ -1,13 +1,45 @@
1
  ---
2
- title: CDK Web
3
- emoji: 🌖
4
- colorFrom: indigo
5
- colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 4.37.1
8
  app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: CDK_library
 
 
 
 
 
3
  app_file: app.py
4
+ sdk: gradio
5
+ sdk_version: 4.36.1
6
  ---
7
+ # Molecular Descriptor Analysis and Chemical Reaction Visualization
8
+
9
+ This repository provides a web-based interface using Gradio for performing various molecular descriptor calculations, geometric analyses, substructure checks, similarity calculations, and chemical reaction visualizations. It integrates functionalities from RDKit and CDK libraries, enabling users to interactively analyze chemical data without needing extensive programming knowledge.
10
+
11
+ ## Features
12
+
13
+ - **Calculate Descriptors:** Input SMILES strings and optionally include 3D coordinates to generate MolFile outputs and download descriptors as Excel.
14
+ - **Geometric Values:** Compute molecular weight, centroid, mean distance to centroid, and max distance to centroid for a given molecule and display its structure.
15
+ - **Check Substructure:** Check if a specified substructure is present within a molecule and visualize both structures.
16
+ - **Calculate Similarity:** Compute Tanimoto similarity coefficient between two molecules based on their SMILES strings.
17
+ - **Chemical Reaction:** Define chemical reactions using SMARTS notation, input reactant SMILES strings, and visualize the resulting reaction products interactively.
18
+
19
+ ## Usage
20
+
21
+ 1. **Setup Environment:**
22
+ - Ensure Python 3.7+ is installed.
23
+ - Install required packages using `pip install -r requirements.txt`.
24
+
25
+ 2. **Run the Application:**
26
+ - Execute `python app.py` to start the Gradio interface locally.
27
+ - Access the interface at `http://localhost:7860`.
28
+
29
+ 3. **Interface Navigation:**
30
+ - Use the tabs provided to input SMILES strings and parameters for the desired chemical analysis or reaction.
31
+ - Click buttons to perform calculations or reactions and view results interactively.
32
+
33
+ 4. **Contribute:**
34
+ - Fork the repository, make your changes, and submit a pull request.
35
+ - Report any issues or suggest improvements through GitHub issues.
36
+
37
+ ## Dependencies
38
+
39
+ - RDKit: Open-source cheminformatics software.
40
+ - CDK: Chemistry Development Kit for molecular descriptor calculation.
41
+ - Gradio: User-friendly Python library for creating UIs around ML models.
42
+
43
+ ## License
44
 
45
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
app.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from CDK_pywrapper import CDK
3
+ from rdkit import Chem
4
+ from rdkit.Chem import Descriptors, Draw, AllChem, rdMolDescriptors
5
+ from rdkit.Chem.Fingerprints import FingerprintMols
6
+ from rdkit.DataStructs import TanimotoSimilarity
7
+ import pandas as pd
8
+ import numpy as np
9
+ import tempfile
10
+
11
+
12
+ # Function to convert SMILES to MolFile
13
+ def convert_smiles_to_mol(smiles_list, checkbox):
14
+
15
+ if checkbox == True:
16
+ cdk = CDK(ignore_3D=False)
17
+ else:
18
+ cdk = CDK()
19
+ smiles_list = list(smiles_list.split(','))
20
+ try:
21
+ mols = [Chem.AddHs(Chem.MolFromSmiles(smiles)) for smiles in smiles_list]
22
+
23
+ molfile = cdk.calculate(mols)
24
+
25
+ try:
26
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as tmp:
27
+ file_path = tmp.name
28
+ molfile.to_excel(file_path, index=False)
29
+ return molfile,file_path
30
+ except Exception as e:
31
+ return molfile,str(e)
32
+
33
+ except Exception as e:
34
+ return str(e), str(e)
35
+
36
+
37
+ # Function to calculate Molecular Weight
38
+ def calculate_molecular_weight(smiles):
39
+ if smiles is None:
40
+ return "SMILES string is None"
41
+ try:
42
+ molecule = Chem.MolFromSmiles(smiles)
43
+ if molecule is None:
44
+ return "Invalid SMILES"
45
+ mol_weight = Descriptors.MolWt(molecule)
46
+ img = Draw.MolToImage(molecule)
47
+ return mol_weight, img
48
+ except Exception as e:
49
+ return str(e)
50
+
51
+ def get_geometric_descriptors(smiles):
52
+ try:
53
+ mol = Chem.MolFromSmiles(smiles)
54
+ if mol is None:
55
+ return "Invalid SMILES string"
56
+
57
+ # Add hydrogens and compute 3D coordinates
58
+ mol = Chem.AddHs(mol)
59
+ AllChem.EmbedMolecule(mol, AllChem.ETKDG())
60
+ AllChem.UFFOptimizeMolecule(mol)
61
+
62
+ # Calculate geometric descriptors
63
+ conformer = mol.GetConformer()
64
+ coords = conformer.GetPositions()
65
+
66
+ centroid = np.mean(coords, axis=0)
67
+ centroid = np.round(centroid, 12)
68
+ distances = np.linalg.norm(coords - centroid, axis=1)
69
+ mol_weight = Descriptors.MolWt(mol)
70
+
71
+ geometric_descriptors = {
72
+ 'Molecular Weight': mol_weight,
73
+ 'Centroid': centroid.tolist(),
74
+ 'Mean Distance To Centroid': np.mean(distances),
75
+ 'Max Distance To Centroid': np.max(distances)
76
+ }
77
+ img = Draw.MolToImage(mol)
78
+ df = pd.DataFrame([geometric_descriptors])
79
+ return df.T,img
80
+ except Exception as e:
81
+ return str(e), str(e)
82
+
83
+ # Function to check if a substructure is present
84
+ def check_substructure(smiles, substructure_smiles):
85
+ # Convert the SMILES strings to RDKit molecule objects
86
+ molecule = Chem.MolFromSmiles(smiles)
87
+ substructure = Chem.MolFromSmiles(substructure_smiles)
88
+
89
+ # Check if the molecule is None (invalid SMILES)
90
+ if molecule is None or substructure is None:
91
+ return "Error","Error","Invalid SMILES string provided."
92
+
93
+ # Use RDKit's HasSubstructMatch to check for the substructure
94
+ val = molecule.HasSubstructMatch(substructure)
95
+ img1 = Draw.MolToImage(molecule)
96
+
97
+ if val:
98
+ try:
99
+ molecule = Chem.MolFromSmiles(smiles)
100
+ sub_molecule = Chem.MolFromSmiles(substructure_smiles)
101
+ img1 = Draw.MolToImage(molecule)
102
+ img2 = Draw.MolToImage(sub_molecule)
103
+ return img1, img2, "Substructure is present."
104
+ except Exception as e:
105
+ return str(e), str(e), "Substructure is present."
106
+ else:
107
+ return img1,"NO Image","Substructure is not present."
108
+
109
+ def calculate_similarity(smiles1, smiles2):
110
+ try:
111
+ mol1 = Chem.MolFromSmiles(smiles1)
112
+ mol2 = Chem.MolFromSmiles(smiles2)
113
+
114
+ if mol1 is None or mol2 is None:
115
+ return "Invalid SMILES string"
116
+
117
+ fp1 = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol1, radius=2, nBits=2048)
118
+ fp2 = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol2, radius=2, nBits=2048)
119
+
120
+ similarity = TanimotoSimilarity(fp1, fp2)
121
+ return similarity
122
+ except Exception as e:
123
+ return str(e)
124
+
125
+ def perform_reaction(reactant1_smiles, reactant2_smiles, reaction_smarts):
126
+ try:
127
+ # Define the reaction using SMARTS provided by the user
128
+ reaction = AllChem.ReactionFromSmarts(reaction_smarts)
129
+
130
+ # Convert SMILES to RDKit molecules
131
+ reactant1 = Chem.MolFromSmiles(reactant1_smiles)
132
+ reactant2 = Chem.MolFromSmiles(reactant2_smiles)
133
+
134
+ if reactant1 is None or reactant2 is None:
135
+ return "Invalid SMILES string(s)", None
136
+
137
+ # Run the reaction
138
+ products = reaction.RunReactants((reactant1, reactant2))
139
+
140
+ # Create a grid image of reactants and products
141
+ all_mols = [reactant1, reactant2]
142
+ legends = ["Reactant 1", "Reactant 2"]
143
+ for i, product_set in enumerate(products):
144
+ for j, product in enumerate(product_set):
145
+ all_mols.append(product)
146
+ legends.append(f'Product {i+1}.{j+1}')
147
+
148
+ img = Draw.MolsToGridImage(all_mols, molsPerRow=4, subImgSize=(300, 300), legends=legends)
149
+ return "Reaction Successful", img
150
+
151
+ except Exception as e:
152
+ return str(e), None
153
+
154
+ # Gradio Interface
155
+ def generate_reaction_image(reaction_smarts,reactant1_smiles, reactant2_smiles):
156
+ result, img = perform_reaction(reactant1_smiles, reactant2_smiles, reaction_smarts)
157
+ return result, img
158
+
159
+
160
+
161
+
162
+ # Gradio Interface
163
+ with gr.Blocks(theme='earneleh/paris') as demo:
164
+ gr.Markdown("### CDK Functionality with Gradio Interface")
165
+
166
+ with gr.Tab("Calculate Descriptors"):
167
+ smiles_input = gr.Textbox(label="SMILES", info="Enter SMILES separated by comma")
168
+ checkbox = gr.Checkbox(label="Include 3D Coordinates")
169
+ molfile_output = gr.Textbox(label="MolFile", lines=10)
170
+ convert_button = gr.Button("Calculate")
171
+ download_link = gr.File(label="Download Descriptors as Excel")
172
+ convert_button.click(fn=convert_smiles_to_mol, inputs=[smiles_input, checkbox], outputs=[molfile_output,download_link])
173
+
174
+ with gr.Tab("Geometric Values"):
175
+ with gr.Row():
176
+ with gr.Column(min_width=800):
177
+ smiles_input_mw = gr.Textbox(label="SMILE")
178
+ weight_output = gr.TextArea(label="Geometric Values", lines=8, show_copy_button=True)
179
+ calculate_button = gr.Button("Calculate")
180
+ with gr.Column():
181
+ image_output = gr.Image(label="Molecular Structure", height=400, width=500)
182
+ calculate_button.click(fn=get_geometric_descriptors, inputs=smiles_input_mw,outputs=[weight_output, image_output])
183
+
184
+ with gr.Tab("Check Substructure"):
185
+ with gr.Row():
186
+ with gr.Column():
187
+ smiles_input_sub = gr.Textbox(label="SMILES")
188
+ substructure_input = gr.Textbox(label="Substructure SMILES")
189
+ substructure_output = gr.Label(label="Is Substructure Present?")
190
+ check_button = gr.Button("Check")
191
+ with gr.Column():
192
+ image_output1 = gr.Image(label="Molecular Structure", height=350, width=500)
193
+ image_output2 = gr.Image(label="Sub_Molecular Structure", height=350, width=500)
194
+ check_button.click(fn=check_substructure, inputs=[smiles_input_sub, substructure_input], outputs=[image_output1, image_output2, substructure_output])
195
+
196
+
197
+ with gr.Tab("Calculate Similarity"):
198
+ smiles_input1 = gr.Textbox(label="SMILES 1")
199
+ smiles_input2 = gr.Textbox(label="SMILES 2")
200
+ similarity_output = gr.Number(label="Similarity (Tanimoto)")
201
+ calculate_button_sim = gr.Button("Calculate Similarity")
202
+ calculate_button_sim.click(fn=calculate_similarity, inputs=[smiles_input1, smiles_input2], outputs=similarity_output)
203
+
204
+ with gr.Tab("Chemical Reaction"):
205
+ reaction_smarts_input = gr.Textbox(label="Reaction SMARTS",value="[O:2]=[C:1][OH].[N:3]>>[O:2]=[C:1][N:3]")
206
+ smiles_input1 = gr.Textbox(label="Reactant 1 SMILES", value="OC=O")
207
+ smiles_input2 = gr.Textbox(label="Reactant 2 SMILES", value= "NCC")
208
+ calculate_button = gr.Button("Perform Reaction")
209
+
210
+ result_output = gr.Label(label="Result")
211
+ image_output = gr.Image(label="Reaction Image", interactive=True)
212
+ calculate_button.click(fn=generate_reaction_image, inputs=[reaction_smarts_input, smiles_input1, smiles_input2], outputs=[result_output, image_output])
213
+
214
+
215
+ # Launch Gradio Interface
216
+ demo.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ CDK-pywrapper
2
+ gradio
3
+ rdkit
4
+ pandas
5
+ numpy
6
+ tempfile