Spaces:
Running
Running
AryanRajSaxena
commited on
Commit
•
7cd4053
1
Parent(s):
f1aadc4
application file
Browse files- README.md +41 -9
- app.py +216 -0
- requirements.txt +6 -0
README.md
CHANGED
@@ -1,13 +1,45 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji: 🌖
|
4 |
-
colorFrom: indigo
|
5 |
-
colorTo: indigo
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 4.37.1
|
8 |
app_file: app.py
|
9 |
-
|
10 |
-
|
11 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
-
|
|
|
1 |
---
|
2 |
+
title: CDK_library
|
|
|
|
|
|
|
|
|
|
|
3 |
app_file: app.py
|
4 |
+
sdk: gradio
|
5 |
+
sdk_version: 4.36.1
|
6 |
---
|
7 |
+
# Molecular Descriptor Analysis and Chemical Reaction Visualization
|
8 |
+
|
9 |
+
This repository provides a web-based interface using Gradio for performing various molecular descriptor calculations, geometric analyses, substructure checks, similarity calculations, and chemical reaction visualizations. It integrates functionalities from RDKit and CDK libraries, enabling users to interactively analyze chemical data without needing extensive programming knowledge.
|
10 |
+
|
11 |
+
## Features
|
12 |
+
|
13 |
+
- **Calculate Descriptors:** Input SMILES strings and optionally include 3D coordinates to generate MolFile outputs and download descriptors as Excel.
|
14 |
+
- **Geometric Values:** Compute molecular weight, centroid, mean distance to centroid, and max distance to centroid for a given molecule and display its structure.
|
15 |
+
- **Check Substructure:** Check if a specified substructure is present within a molecule and visualize both structures.
|
16 |
+
- **Calculate Similarity:** Compute Tanimoto similarity coefficient between two molecules based on their SMILES strings.
|
17 |
+
- **Chemical Reaction:** Define chemical reactions using SMARTS notation, input reactant SMILES strings, and visualize the resulting reaction products interactively.
|
18 |
+
|
19 |
+
## Usage
|
20 |
+
|
21 |
+
1. **Setup Environment:**
|
22 |
+
- Ensure Python 3.7+ is installed.
|
23 |
+
- Install required packages using `pip install -r requirements.txt`.
|
24 |
+
|
25 |
+
2. **Run the Application:**
|
26 |
+
- Execute `python app.py` to start the Gradio interface locally.
|
27 |
+
- Access the interface at `http://localhost:7860`.
|
28 |
+
|
29 |
+
3. **Interface Navigation:**
|
30 |
+
- Use the tabs provided to input SMILES strings and parameters for the desired chemical analysis or reaction.
|
31 |
+
- Click buttons to perform calculations or reactions and view results interactively.
|
32 |
+
|
33 |
+
4. **Contribute:**
|
34 |
+
- Fork the repository, make your changes, and submit a pull request.
|
35 |
+
- Report any issues or suggest improvements through GitHub issues.
|
36 |
+
|
37 |
+
## Dependencies
|
38 |
+
|
39 |
+
- RDKit: Open-source cheminformatics software.
|
40 |
+
- CDK: Chemistry Development Kit for molecular descriptor calculation.
|
41 |
+
- Gradio: User-friendly Python library for creating UIs around ML models.
|
42 |
+
|
43 |
+
## License
|
44 |
|
45 |
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
app.py
ADDED
@@ -0,0 +1,216 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from CDK_pywrapper import CDK
|
3 |
+
from rdkit import Chem
|
4 |
+
from rdkit.Chem import Descriptors, Draw, AllChem, rdMolDescriptors
|
5 |
+
from rdkit.Chem.Fingerprints import FingerprintMols
|
6 |
+
from rdkit.DataStructs import TanimotoSimilarity
|
7 |
+
import pandas as pd
|
8 |
+
import numpy as np
|
9 |
+
import tempfile
|
10 |
+
|
11 |
+
|
12 |
+
# Function to convert SMILES to MolFile
|
13 |
+
def convert_smiles_to_mol(smiles_list, checkbox):
|
14 |
+
|
15 |
+
if checkbox == True:
|
16 |
+
cdk = CDK(ignore_3D=False)
|
17 |
+
else:
|
18 |
+
cdk = CDK()
|
19 |
+
smiles_list = list(smiles_list.split(','))
|
20 |
+
try:
|
21 |
+
mols = [Chem.AddHs(Chem.MolFromSmiles(smiles)) for smiles in smiles_list]
|
22 |
+
|
23 |
+
molfile = cdk.calculate(mols)
|
24 |
+
|
25 |
+
try:
|
26 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as tmp:
|
27 |
+
file_path = tmp.name
|
28 |
+
molfile.to_excel(file_path, index=False)
|
29 |
+
return molfile,file_path
|
30 |
+
except Exception as e:
|
31 |
+
return molfile,str(e)
|
32 |
+
|
33 |
+
except Exception as e:
|
34 |
+
return str(e), str(e)
|
35 |
+
|
36 |
+
|
37 |
+
# Function to calculate Molecular Weight
|
38 |
+
def calculate_molecular_weight(smiles):
|
39 |
+
if smiles is None:
|
40 |
+
return "SMILES string is None"
|
41 |
+
try:
|
42 |
+
molecule = Chem.MolFromSmiles(smiles)
|
43 |
+
if molecule is None:
|
44 |
+
return "Invalid SMILES"
|
45 |
+
mol_weight = Descriptors.MolWt(molecule)
|
46 |
+
img = Draw.MolToImage(molecule)
|
47 |
+
return mol_weight, img
|
48 |
+
except Exception as e:
|
49 |
+
return str(e)
|
50 |
+
|
51 |
+
def get_geometric_descriptors(smiles):
|
52 |
+
try:
|
53 |
+
mol = Chem.MolFromSmiles(smiles)
|
54 |
+
if mol is None:
|
55 |
+
return "Invalid SMILES string"
|
56 |
+
|
57 |
+
# Add hydrogens and compute 3D coordinates
|
58 |
+
mol = Chem.AddHs(mol)
|
59 |
+
AllChem.EmbedMolecule(mol, AllChem.ETKDG())
|
60 |
+
AllChem.UFFOptimizeMolecule(mol)
|
61 |
+
|
62 |
+
# Calculate geometric descriptors
|
63 |
+
conformer = mol.GetConformer()
|
64 |
+
coords = conformer.GetPositions()
|
65 |
+
|
66 |
+
centroid = np.mean(coords, axis=0)
|
67 |
+
centroid = np.round(centroid, 12)
|
68 |
+
distances = np.linalg.norm(coords - centroid, axis=1)
|
69 |
+
mol_weight = Descriptors.MolWt(mol)
|
70 |
+
|
71 |
+
geometric_descriptors = {
|
72 |
+
'Molecular Weight': mol_weight,
|
73 |
+
'Centroid': centroid.tolist(),
|
74 |
+
'Mean Distance To Centroid': np.mean(distances),
|
75 |
+
'Max Distance To Centroid': np.max(distances)
|
76 |
+
}
|
77 |
+
img = Draw.MolToImage(mol)
|
78 |
+
df = pd.DataFrame([geometric_descriptors])
|
79 |
+
return df.T,img
|
80 |
+
except Exception as e:
|
81 |
+
return str(e), str(e)
|
82 |
+
|
83 |
+
# Function to check if a substructure is present
|
84 |
+
def check_substructure(smiles, substructure_smiles):
|
85 |
+
# Convert the SMILES strings to RDKit molecule objects
|
86 |
+
molecule = Chem.MolFromSmiles(smiles)
|
87 |
+
substructure = Chem.MolFromSmiles(substructure_smiles)
|
88 |
+
|
89 |
+
# Check if the molecule is None (invalid SMILES)
|
90 |
+
if molecule is None or substructure is None:
|
91 |
+
return "Error","Error","Invalid SMILES string provided."
|
92 |
+
|
93 |
+
# Use RDKit's HasSubstructMatch to check for the substructure
|
94 |
+
val = molecule.HasSubstructMatch(substructure)
|
95 |
+
img1 = Draw.MolToImage(molecule)
|
96 |
+
|
97 |
+
if val:
|
98 |
+
try:
|
99 |
+
molecule = Chem.MolFromSmiles(smiles)
|
100 |
+
sub_molecule = Chem.MolFromSmiles(substructure_smiles)
|
101 |
+
img1 = Draw.MolToImage(molecule)
|
102 |
+
img2 = Draw.MolToImage(sub_molecule)
|
103 |
+
return img1, img2, "Substructure is present."
|
104 |
+
except Exception as e:
|
105 |
+
return str(e), str(e), "Substructure is present."
|
106 |
+
else:
|
107 |
+
return img1,"NO Image","Substructure is not present."
|
108 |
+
|
109 |
+
def calculate_similarity(smiles1, smiles2):
|
110 |
+
try:
|
111 |
+
mol1 = Chem.MolFromSmiles(smiles1)
|
112 |
+
mol2 = Chem.MolFromSmiles(smiles2)
|
113 |
+
|
114 |
+
if mol1 is None or mol2 is None:
|
115 |
+
return "Invalid SMILES string"
|
116 |
+
|
117 |
+
fp1 = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol1, radius=2, nBits=2048)
|
118 |
+
fp2 = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol2, radius=2, nBits=2048)
|
119 |
+
|
120 |
+
similarity = TanimotoSimilarity(fp1, fp2)
|
121 |
+
return similarity
|
122 |
+
except Exception as e:
|
123 |
+
return str(e)
|
124 |
+
|
125 |
+
def perform_reaction(reactant1_smiles, reactant2_smiles, reaction_smarts):
|
126 |
+
try:
|
127 |
+
# Define the reaction using SMARTS provided by the user
|
128 |
+
reaction = AllChem.ReactionFromSmarts(reaction_smarts)
|
129 |
+
|
130 |
+
# Convert SMILES to RDKit molecules
|
131 |
+
reactant1 = Chem.MolFromSmiles(reactant1_smiles)
|
132 |
+
reactant2 = Chem.MolFromSmiles(reactant2_smiles)
|
133 |
+
|
134 |
+
if reactant1 is None or reactant2 is None:
|
135 |
+
return "Invalid SMILES string(s)", None
|
136 |
+
|
137 |
+
# Run the reaction
|
138 |
+
products = reaction.RunReactants((reactant1, reactant2))
|
139 |
+
|
140 |
+
# Create a grid image of reactants and products
|
141 |
+
all_mols = [reactant1, reactant2]
|
142 |
+
legends = ["Reactant 1", "Reactant 2"]
|
143 |
+
for i, product_set in enumerate(products):
|
144 |
+
for j, product in enumerate(product_set):
|
145 |
+
all_mols.append(product)
|
146 |
+
legends.append(f'Product {i+1}.{j+1}')
|
147 |
+
|
148 |
+
img = Draw.MolsToGridImage(all_mols, molsPerRow=4, subImgSize=(300, 300), legends=legends)
|
149 |
+
return "Reaction Successful", img
|
150 |
+
|
151 |
+
except Exception as e:
|
152 |
+
return str(e), None
|
153 |
+
|
154 |
+
# Gradio Interface
|
155 |
+
def generate_reaction_image(reaction_smarts,reactant1_smiles, reactant2_smiles):
|
156 |
+
result, img = perform_reaction(reactant1_smiles, reactant2_smiles, reaction_smarts)
|
157 |
+
return result, img
|
158 |
+
|
159 |
+
|
160 |
+
|
161 |
+
|
162 |
+
# Gradio Interface
|
163 |
+
with gr.Blocks(theme='earneleh/paris') as demo:
|
164 |
+
gr.Markdown("### CDK Functionality with Gradio Interface")
|
165 |
+
|
166 |
+
with gr.Tab("Calculate Descriptors"):
|
167 |
+
smiles_input = gr.Textbox(label="SMILES", info="Enter SMILES separated by comma")
|
168 |
+
checkbox = gr.Checkbox(label="Include 3D Coordinates")
|
169 |
+
molfile_output = gr.Textbox(label="MolFile", lines=10)
|
170 |
+
convert_button = gr.Button("Calculate")
|
171 |
+
download_link = gr.File(label="Download Descriptors as Excel")
|
172 |
+
convert_button.click(fn=convert_smiles_to_mol, inputs=[smiles_input, checkbox], outputs=[molfile_output,download_link])
|
173 |
+
|
174 |
+
with gr.Tab("Geometric Values"):
|
175 |
+
with gr.Row():
|
176 |
+
with gr.Column(min_width=800):
|
177 |
+
smiles_input_mw = gr.Textbox(label="SMILE")
|
178 |
+
weight_output = gr.TextArea(label="Geometric Values", lines=8, show_copy_button=True)
|
179 |
+
calculate_button = gr.Button("Calculate")
|
180 |
+
with gr.Column():
|
181 |
+
image_output = gr.Image(label="Molecular Structure", height=400, width=500)
|
182 |
+
calculate_button.click(fn=get_geometric_descriptors, inputs=smiles_input_mw,outputs=[weight_output, image_output])
|
183 |
+
|
184 |
+
with gr.Tab("Check Substructure"):
|
185 |
+
with gr.Row():
|
186 |
+
with gr.Column():
|
187 |
+
smiles_input_sub = gr.Textbox(label="SMILES")
|
188 |
+
substructure_input = gr.Textbox(label="Substructure SMILES")
|
189 |
+
substructure_output = gr.Label(label="Is Substructure Present?")
|
190 |
+
check_button = gr.Button("Check")
|
191 |
+
with gr.Column():
|
192 |
+
image_output1 = gr.Image(label="Molecular Structure", height=350, width=500)
|
193 |
+
image_output2 = gr.Image(label="Sub_Molecular Structure", height=350, width=500)
|
194 |
+
check_button.click(fn=check_substructure, inputs=[smiles_input_sub, substructure_input], outputs=[image_output1, image_output2, substructure_output])
|
195 |
+
|
196 |
+
|
197 |
+
with gr.Tab("Calculate Similarity"):
|
198 |
+
smiles_input1 = gr.Textbox(label="SMILES 1")
|
199 |
+
smiles_input2 = gr.Textbox(label="SMILES 2")
|
200 |
+
similarity_output = gr.Number(label="Similarity (Tanimoto)")
|
201 |
+
calculate_button_sim = gr.Button("Calculate Similarity")
|
202 |
+
calculate_button_sim.click(fn=calculate_similarity, inputs=[smiles_input1, smiles_input2], outputs=similarity_output)
|
203 |
+
|
204 |
+
with gr.Tab("Chemical Reaction"):
|
205 |
+
reaction_smarts_input = gr.Textbox(label="Reaction SMARTS",value="[O:2]=[C:1][OH].[N:3]>>[O:2]=[C:1][N:3]")
|
206 |
+
smiles_input1 = gr.Textbox(label="Reactant 1 SMILES", value="OC=O")
|
207 |
+
smiles_input2 = gr.Textbox(label="Reactant 2 SMILES", value= "NCC")
|
208 |
+
calculate_button = gr.Button("Perform Reaction")
|
209 |
+
|
210 |
+
result_output = gr.Label(label="Result")
|
211 |
+
image_output = gr.Image(label="Reaction Image", interactive=True)
|
212 |
+
calculate_button.click(fn=generate_reaction_image, inputs=[reaction_smarts_input, smiles_input1, smiles_input2], outputs=[result_output, image_output])
|
213 |
+
|
214 |
+
|
215 |
+
# Launch Gradio Interface
|
216 |
+
demo.launch(share=True)
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
CDK-pywrapper
|
2 |
+
gradio
|
3 |
+
rdkit
|
4 |
+
pandas
|
5 |
+
numpy
|
6 |
+
tempfile
|