Spaces:
Sleeping
Sleeping
import os,sys | |
from openai import OpenAI | |
import gradio as gr | |
# install required packages | |
os.system('pip install -q plotly') | |
os.system('pip install -q matplotlib') | |
os.system('pip install dgl==1.0.2+cu116 -f https://data.dgl.ai/wheels/cu116/repo.html') | |
os.environ["DGLBACKEND"] = "pytorch" | |
print('Modules installed') | |
# ํ์ ๋ผ์ด๋ธ๋ฌ๋ฆฌ ์ํฌํธ | |
from datasets import load_dataset | |
import plotly.graph_objects as go | |
import numpy as np | |
import py3Dmol | |
from io import StringIO | |
import json | |
import secrets | |
import copy | |
import matplotlib.pyplot as plt | |
from utils.sampler import HuggingFace_sampler | |
from utils.parsers_inference import parse_pdb | |
from model.util import writepdb | |
from utils.inpainting_util import * | |
# Hugging Face ํ ํฐ ์ค์ | |
ACCESS_TOKEN = os.getenv("HF_TOKEN") | |
if not ACCESS_TOKEN: | |
raise ValueError("HF_TOKEN not found in environment variables") | |
# OpenAI ํด๋ผ์ด์ธํธ ์ค์ (Hugging Face ์๋ํฌ์ธํธ ์ฌ์ฉ) | |
client = OpenAI( | |
base_url="https://api-inference.huggingface.co/v1/", | |
api_key=ACCESS_TOKEN, | |
) | |
# ๋ฐ์ดํฐ์ ๋ก๋ | |
ds = load_dataset("lamm-mit/protein_secondary_structure_from_PDB", | |
token=ACCESS_TOKEN) | |
def respond( | |
message, | |
history: list[tuple[str, str]], | |
system_message, | |
max_tokens, | |
temperature, | |
top_p, | |
): | |
messages = [{"role": "system", "content": system_message}] | |
for val in history: | |
if val[0]: | |
messages.append({"role": "user", "content": val[0]}) | |
if val[1]: | |
messages.append({"role": "assistant", "content": val[1]}) | |
messages.append({"role": "user", "content": message}) | |
response = "" | |
for message in client.chat.completions.create( | |
model="CohereForAI/c4ai-command-r-plus-08-2024", | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
messages=messages, | |
): | |
token = message.choices[0].delta.content | |
response += token | |
yield response | |
# ์ฑ๋ด ๋ฐ ๋จ๋ฐฑ์ง ์์ฑ ๊ด๋ จ ํจ์๋ค | |
def process_chat(message, history): | |
messages = [{"role": "user", "content": message}] | |
response = pipe(messages)[0]['generated_text'] | |
if any(keyword in message.lower() for keyword in ['protein', 'generate', '๋จ๋ฐฑ์ง', '์์ฑ']): | |
relevant_data = search_protein_data(message) | |
params = extract_parameters(response, relevant_data) | |
protein_result = generate_protein(params) | |
explanation = generate_explanation(protein_result, params) | |
return response + "\n\n" + explanation | |
return response | |
def search_protein_data(query): | |
relevant_entries = [] | |
for entry in ds['train']: | |
if any(keyword in entry['sequence'].lower() for keyword in query.lower().split()): | |
relevant_entries.append(entry) | |
return relevant_entries | |
def extract_parameters(llm_response, dataset_info): | |
params = { | |
'sequence_length': 100, | |
'helix_bias': 0.02, | |
'strand_bias': 0.02, | |
'loop_bias': 0.1, | |
'hydrophobic_target_score': 0 | |
} | |
return params | |
def generate_explanation(result, params): | |
explanation = f""" | |
์์ฑ๋ ๋จ๋ฐฑ์ง ๋ถ์: | |
- ๊ธธ์ด: {params['sequence_length']} ์๋ฏธ๋ ธ์ฐ | |
- ๊ตฌ์กฐ์ ํน์ง: | |
* ์ํ ๋์ ๋น์จ: {params['helix_bias']*100}% | |
* ๋ฒ ํ ์ํธ ๋น์จ: {params['strand_bias']*100}% | |
* ๋ฃจํ ๊ตฌ์กฐ ๋น์จ: {params['loop_bias']*100}% | |
- ํน์ ๊ธฐ๋ฅ: {result.get('special_features', '์์')} | |
""" | |
return explanation | |
def protein_diffusion_model(sequence, seq_len, helix_bias, strand_bias, loop_bias, | |
secondary_structure, aa_bias, aa_bias_potential, | |
num_steps, noise, hydrophobic_target_score, hydrophobic_potential, | |
contigs, pssm, seq_mask, str_mask, rewrite_pdb): | |
dssp_checkpoint = './SEQDIFF_230205_dssp_hotspots_25mask_EQtasks_mod30.pt' | |
og_checkpoint = './SEQDIFF_221219_equalTASKS_nostrSELFCOND_mod30.pt' | |
model_args = copy.deepcopy(args) | |
# make sampler | |
S = HuggingFace_sampler(args=model_args) | |
# get random prefix | |
S.out_prefix = './tmp/'+secrets.token_hex(nbytes=10).upper() | |
# set args | |
S.args['checkpoint'] = None | |
S.args['dump_trb'] = False | |
S.args['dump_args'] = True | |
S.args['save_best_plddt'] = True | |
S.args['T'] = 20 | |
S.args['strand_bias'] = 0.0 | |
S.args['loop_bias'] = 0.0 | |
S.args['helix_bias'] = 0.0 | |
S.args['potentials'] = None | |
S.args['potential_scale'] = None | |
S.args['aa_composition'] = None | |
# get sequence if entered and make sure all chars are valid | |
alt_aa_dict = {'B':['D','N'],'J':['I','L'],'U':['C'],'Z':['E','Q'],'O':['K']} | |
if sequence not in ['',None]: | |
L = len(sequence) | |
aa_seq = [] | |
for aa in sequence.upper(): | |
if aa in alt_aa_dict.keys(): | |
aa_seq.append(np.random.choice(alt_aa_dict[aa])) | |
else: | |
aa_seq.append(aa) | |
S.args['sequence'] = aa_seq | |
elif contigs not in ['',None]: | |
S.args['contigs'] = [contigs] | |
else: | |
S.args['contigs'] = [f'{seq_len}'] | |
L = int(seq_len) | |
print('DEBUG: ',rewrite_pdb) | |
if rewrite_pdb not in ['',None]: | |
S.args['pdb'] = rewrite_pdb.name | |
if seq_mask not in ['',None]: | |
S.args['inpaint_seq'] = [seq_mask] | |
if str_mask not in ['',None]: | |
S.args['inpaint_str'] = [str_mask] | |
if secondary_structure in ['',None]: | |
secondary_structure = None | |
else: | |
secondary_structure = ''.join(['E' if x == 'S' else x for x in secondary_structure]) | |
if L < len(secondary_structure): | |
secondary_structure = secondary_structure[:len(sequence)] | |
elif L == len(secondary_structure): | |
pass | |
else: | |
dseq = L - len(secondary_structure) | |
secondary_structure += secondary_structure[-1]*dseq | |
# potentials | |
potential_list = [] | |
potential_bias_list = [] | |
if aa_bias not in ['',None]: | |
potential_list.append('aa_bias') | |
S.args['aa_composition'] = aa_bias | |
if aa_bias_potential in ['',None]: | |
aa_bias_potential = 3 | |
potential_bias_list.append(str(aa_bias_potential)) | |
''' | |
if target_charge not in ['',None]: | |
potential_list.append('charge') | |
if charge_potential in ['',None]: | |
charge_potential = 1 | |
potential_bias_list.append(str(charge_potential)) | |
S.args['target_charge'] = float(target_charge) | |
if target_ph in ['',None]: | |
target_ph = 7.4 | |
S.args['target_pH'] = float(target_ph) | |
''' | |
if hydrophobic_target_score not in ['',None]: | |
potential_list.append('hydrophobic') | |
S.args['hydrophobic_score'] = float(hydrophobic_target_score) | |
if hydrophobic_potential in ['',None]: | |
hydrophobic_potential = 3 | |
potential_bias_list.append(str(hydrophobic_potential)) | |
if pssm not in ['',None]: | |
potential_list.append('PSSM') | |
potential_bias_list.append('5') | |
S.args['PSSM'] = pssm.name | |
if len(potential_list) > 0: | |
S.args['potentials'] = ','.join(potential_list) | |
S.args['potential_scale'] = ','.join(potential_bias_list) | |
# normalise secondary_structure bias from range 0-0.3 | |
S.args['secondary_structure'] = secondary_structure | |
S.args['helix_bias'] = helix_bias | |
S.args['strand_bias'] = strand_bias | |
S.args['loop_bias'] = loop_bias | |
# set T | |
if num_steps in ['',None]: | |
S.args['T'] = 20 | |
else: | |
S.args['T'] = int(num_steps) | |
# noise | |
if 'normal' in noise: | |
S.args['sample_distribution'] = noise | |
S.args['sample_distribution_gmm_means'] = [0] | |
S.args['sample_distribution_gmm_variances'] = [1] | |
elif 'gmm2' in noise: | |
S.args['sample_distribution'] = noise | |
S.args['sample_distribution_gmm_means'] = [-1,1] | |
S.args['sample_distribution_gmm_variances'] = [1,1] | |
elif 'gmm3' in noise: | |
S.args['sample_distribution'] = noise | |
S.args['sample_distribution_gmm_means'] = [-1,0,1] | |
S.args['sample_distribution_gmm_variances'] = [1,1,1] | |
if secondary_structure not in ['',None] or helix_bias+strand_bias+loop_bias > 0: | |
S.args['checkpoint'] = dssp_checkpoint | |
S.args['d_t1d'] = 29 | |
print('using dssp checkpoint') | |
else: | |
S.args['checkpoint'] = og_checkpoint | |
S.args['d_t1d'] = 24 | |
print('using og checkpoint') | |
for k,v in S.args.items(): | |
print(f"{k} --> {v}") | |
# init S | |
S.model_init() | |
S.diffuser_init() | |
S.setup() | |
# sampling loop | |
plddt_data = [] | |
for j in range(S.max_t): | |
print(f'on step {j}') | |
output_seq, output_pdb, plddt = S.take_step_get_outputs(j) | |
plddt_data.append(plddt) | |
yield output_seq, output_pdb, display_pdb(output_pdb), get_plddt_plot(plddt_data, S.max_t) | |
output_seq, output_pdb, plddt = S.get_outputs() | |
return output_seq, output_pdb, display_pdb(output_pdb), get_plddt_plot(plddt_data, S.max_t) | |
def get_plddt_plot(plddt_data, max_t): | |
x = [i+1 for i in range(len(plddt_data))] | |
fig, ax = plt.subplots(figsize=(15,6)) | |
ax.plot(x,plddt_data,color='#661dbf', linewidth=3,marker='o') | |
ax.set_xticks([i+1 for i in range(max_t)]) | |
ax.set_yticks([(i+1)/10 for i in range(10)]) | |
ax.set_ylim([0,1]) | |
ax.set_ylabel('model confidence (plddt)') | |
ax.set_xlabel('diffusion steps (t)') | |
return fig | |
def display_pdb(path_to_pdb): | |
''' | |
#function to display pdb in py3dmol | |
''' | |
pdb = open(path_to_pdb, "r").read() | |
view = py3Dmol.view(width=500, height=500) | |
view.addModel(pdb, "pdb") | |
view.setStyle({'model': -1}, {"cartoon": {'colorscheme':{'prop':'b','gradient':'roygb','min':0,'max':1}}})#'linear', 'min': 0, 'max': 1, 'colors': ["#ff9ef0","#a903fc",]}}}) | |
view.zoomTo() | |
output = view._make_html().replace("'", '"') | |
print(view._make_html()) | |
x = f"""<!DOCTYPE html><html></center> {output} </center></html>""" # do not use ' in this input | |
return f"""<iframe height="500px" width="100%" name="result" allow="midi; geolocation; microphone; camera; | |
display-capture; encrypted-media;" sandbox="allow-modals allow-forms | |
allow-scripts allow-same-origin allow-popups | |
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen="" | |
allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>""" | |
''' | |
return f"""<iframe style="width: 100%; height:700px" name="result" allow="midi; geolocation; microphone; camera; | |
display-capture; encrypted-media;" sandbox="allow-modals allow-forms | |
allow-scripts allow-same-origin allow-popups | |
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen="" | |
allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>""" | |
''' | |
def get_motif_preview(pdb_id, contigs): | |
try: | |
input_pdb = fetch_pdb(pdb_id=pdb_id.lower() if pdb_id else None) | |
if input_pdb is None: | |
return gr.HTML("PDB ID๋ฅผ ์ ๋ ฅํด์ฃผ์ธ์"), None | |
parse = parse_pdb(input_pdb) | |
output_name = input_pdb | |
pdb = open(output_name, "r").read() | |
view = py3Dmol.view(width=500, height=500) | |
view.addModel(pdb, "pdb") | |
if contigs in ['',0]: | |
contigs = ['0'] | |
else: | |
contigs = [contigs] | |
print('DEBUG: ',contigs) | |
pdb_map = get_mappings(ContigMap(parse,contigs)) | |
print('DEBUG: ',pdb_map) | |
print('DEBUG: ',pdb_map['con_ref_idx0']) | |
roi = [x[1]-1 for x in pdb_map['con_ref_pdb_idx']] | |
colormap = {0:'#D3D3D3', 1:'#F74CFF'} | |
colors = {i+1: colormap[1] if i in roi else colormap[0] for i in range(parse['xyz'].shape[0])} | |
view.setStyle({"cartoon": {"colorscheme": {"prop": "resi", "map": colors}}}) | |
view.zoomTo() | |
output = view._make_html().replace("'", '"') | |
print(view._make_html()) | |
x = f"""<!DOCTYPE html><html></center> {output} </center></html>""" # do not use ' in this input | |
return f"""<iframe height="500px" width="100%" name="result" allow="midi; geolocation; microphone; camera; | |
display-capture; encrypted-media;" sandbox="allow-modals allow-forms | |
allow-scripts allow-same-origin allow-popups | |
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen="" | |
allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>""", output_name | |
except Exception as e: | |
return gr.HTML(f"์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"), None | |
def fetch_pdb(pdb_id=None): | |
if pdb_id is None or pdb_id == "": | |
return None | |
else: | |
os.system(f"wget -qnc https://files.rcsb.org/view/{pdb_id}.pdb") | |
return f"{pdb_id}.pdb" | |
# MSA AND PSSM GUIDANCE | |
def save_pssm(file_upload): | |
filename = file_upload.name | |
orig_name = file_upload.orig_name | |
if filename.split('.')[-1] in ['fasta', 'a3m']: | |
return msa_to_pssm(file_upload) | |
return filename | |
def msa_to_pssm(msa_file): | |
# Define the lookup table for converting amino acids to indices | |
aa_to_index = {'A': 0, 'R': 1, 'N': 2, 'D': 3, 'C': 4, 'Q': 5, 'E': 6, 'G': 7, 'H': 8, 'I': 9, 'L': 10, | |
'K': 11, 'M': 12, 'F': 13, 'P': 14, 'S': 15, 'T': 16, 'W': 17, 'Y': 18, 'V': 19, 'X': 20, '-': 21} | |
# Open the FASTA file and read the sequences | |
records = list(SeqIO.parse(msa_file.name, "fasta")) | |
assert len(records) >= 1, "MSA must contain more than one protein sequecne." | |
first_seq = str(records[0].seq) | |
aligned_seqs = [first_seq] | |
# print(aligned_seqs) | |
# Perform sequence alignment using the Needleman-Wunsch algorithm | |
aligner = Align.PairwiseAligner() | |
aligner.open_gap_score = -0.7 | |
aligner.extend_gap_score = -0.3 | |
for record in records[1:]: | |
alignment = aligner.align(first_seq, str(record.seq))[0] | |
alignment = alignment.format().split("\n") | |
al1 = alignment[0] | |
al2 = alignment[2] | |
al1_fin = "" | |
al2_fin = "" | |
percent_gap = al2.count('-')/ len(al2) | |
if percent_gap > 0.4: | |
continue | |
for i in range(len(al1)): | |
if al1[i] != '-': | |
al1_fin += al1[i] | |
al2_fin += al2[i] | |
aligned_seqs.append(str(al2_fin)) | |
# Get the length of the aligned sequences | |
aligned_seq_length = len(first_seq) | |
# Initialize the position scoring matrix | |
matrix = np.zeros((22, aligned_seq_length)) | |
# Iterate through the aligned sequences and count the amino acids at each position | |
for seq in aligned_seqs: | |
#print(seq) | |
for i in range(aligned_seq_length): | |
if i == len(seq): | |
break | |
amino_acid = seq[i] | |
if amino_acid.upper() not in aa_to_index.keys(): | |
continue | |
else: | |
aa_index = aa_to_index[amino_acid.upper()] | |
matrix[aa_index, i] += 1 | |
# Normalize the counts to get the frequency of each amino acid at each position | |
matrix /= len(aligned_seqs) | |
print(len(aligned_seqs)) | |
matrix[20:,]=0 | |
outdir = ".".join(msa_file.name.split('.')[:-1]) + ".csv" | |
np.savetxt(outdir, matrix[:21,:].T, delimiter=",") | |
return outdir | |
def get_pssm(fasta_msa, input_pssm): | |
try: | |
if input_pssm is not None: | |
outdir = input_pssm.name | |
elif fasta_msa is not None: | |
outdir = save_pssm(fasta_msa) | |
else: | |
return gr.Plot(label="ํ์ผ์ ์ ๋ก๋ํด์ฃผ์ธ์"), None | |
pssm = np.loadtxt(outdir, delimiter=",", dtype=float) | |
fig, ax = plt.subplots(figsize=(15,6)) | |
plt.imshow(torch.permute(torch.tensor(pssm),(1,0))) | |
return fig, outdir | |
except Exception as e: | |
return gr.Plot(label=f"์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}"), None | |
# ํ์ด๋ก ๋ฅ๋ ฅ์น ๊ณ์ฐ ํจ์ ์ถ๊ฐ | |
def calculate_hero_stats(helix_bias, strand_bias, loop_bias, hydrophobic_score): | |
stats = { | |
'strength': strand_bias * 20, # ๋ฒ ํ์ํธ ๊ตฌ์กฐ ๊ธฐ๋ฐ | |
'flexibility': helix_bias * 20, # ์ํํฌ๋ฆญ์ค ๊ตฌ์กฐ ๊ธฐ๋ฐ | |
'speed': loop_bias * 5, # ๋ฃจํ ๊ตฌ์กฐ ๊ธฐ๋ฐ | |
'defense': abs(hydrophobic_score) if hydrophobic_score else 0 | |
} | |
return stats | |
def toggle_seq_input(choice): | |
if choice == "์๋ ์ค๊ณ": | |
return gr.update(visible=True), gr.update(visible=False) | |
else: # "์ง์ ์ ๋ ฅ" | |
return gr.update(visible=False), gr.update(visible=True) | |
def toggle_secondary_structure(choice): | |
if choice == "์ฌ๋ผ์ด๋๋ก ์ค์ ": | |
return ( | |
gr.update(visible=True), # helix_bias | |
gr.update(visible=True), # strand_bias | |
gr.update(visible=True), # loop_bias | |
gr.update(visible=False) # secondary_structure | |
) | |
else: # "์ง์ ์ ๋ ฅ" | |
return ( | |
gr.update(visible=False), # helix_bias | |
gr.update(visible=False), # strand_bias | |
gr.update(visible=False), # loop_bias | |
gr.update(visible=True) # secondary_structure | |
) | |
def create_radar_chart(stats): | |
# ๋ ์ด๋ ์ฐจํธ ์์ฑ ๋ก์ง | |
categories = list(stats.keys()) | |
values = list(stats.values()) | |
fig = go.Figure(data=go.Scatterpolar( | |
r=values, | |
theta=categories, | |
fill='toself' | |
)) | |
fig.update_layout( | |
polar=dict( | |
radialaxis=dict( | |
visible=True, | |
range=[0, 1] | |
)), | |
showlegend=False | |
) | |
return fig | |
def generate_hero_description(name, stats, abilities): | |
# ํ์ด๋ก ์ค๋ช ์์ฑ ๋ก์ง | |
description = f""" | |
ํ์ด๋ก ์ด๋ฆ: {name} | |
์ฃผ์ ๋ฅ๋ ฅ: | |
- ๊ทผ๋ ฅ: {'โ ' * int(stats['strength'] * 5)} | |
- ์ ์ฐ์ฑ: {'โ ' * int(stats['flexibility'] * 5)} | |
- ์คํผ๋: {'โ ' * int(stats['speed'] * 5)} | |
- ๋ฐฉ์ด๋ ฅ: {'โ ' * int(stats['defense'] * 5)} | |
ํน์ ๋ฅ๋ ฅ: {', '.join(abilities)} | |
""" | |
return description | |
def combined_generation(name, strength, flexibility, speed, defense, size, abilities, | |
sequence, seq_len, helix_bias, strand_bias, loop_bias, | |
secondary_structure, aa_bias, aa_bias_potential, | |
num_steps, noise, hydrophobic_target_score, hydrophobic_potential, | |
contigs, pssm, seq_mask, str_mask, rewrite_pdb): | |
try: | |
# protein_diffusion_model ์คํ | |
generator = protein_diffusion_model( | |
sequence=None, | |
seq_len=size, # ํ์ด๋ก ํฌ๊ธฐ๋ฅผ seq_len์ผ๋ก ์ฌ์ฉ | |
helix_bias=flexibility, # ํ์ด๋ก ์ ์ฐ์ฑ์ helix_bias๋ก ์ฌ์ฉ | |
strand_bias=strength, # ํ์ด๋ก ๊ฐ๋๋ฅผ strand_bias๋ก ์ฌ์ฉ | |
loop_bias=speed, # ํ์ด๋ก ์คํผ๋๋ฅผ loop_bias๋ก ์ฌ์ฉ | |
secondary_structure=None, | |
aa_bias=None, | |
aa_bias_potential=None, | |
num_steps="25", | |
noise="normal", | |
hydrophobic_target_score=str(-defense), # ํ์ด๋ก ๋ฐฉ์ด๋ ฅ์ hydrophobic score๋ก ์ฌ์ฉ | |
hydrophobic_potential="2", | |
contigs=None, | |
pssm=None, | |
seq_mask=None, | |
str_mask=None, | |
rewrite_pdb=None | |
) | |
# ๋ง์ง๋ง ๊ฒฐ๊ณผ ๊ฐ์ ธ์ค๊ธฐ | |
final_result = None | |
for result in generator: | |
final_result = result | |
if final_result is None: | |
raise Exception("์์ฑ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค") | |
output_seq, output_pdb, structure_view, plddt_plot = final_result | |
# ํ์ด๋ก ๋ฅ๋ ฅ์น ๊ณ์ฐ | |
stats = calculate_hero_stats(flexibility, strength, speed, defense) | |
# ๋ชจ๋ ๊ฒฐ๊ณผ ๋ฐํ | |
return ( | |
create_radar_chart(stats), # ๋ฅ๋ ฅ์น ์ฐจํธ | |
generate_hero_description(name, stats, abilities), # ํ์ด๋ก ์ค๋ช | |
output_seq, # ๋จ๋ฐฑ์ง ์์ด | |
output_pdb, # PDB ํ์ผ | |
structure_view, # 3D ๊ตฌ์กฐ | |
plddt_plot # ์ ๋ขฐ๋ ์ฐจํธ | |
) | |
except Exception as e: | |
print(f"Error in combined_generation: {str(e)}") | |
return ( | |
None, | |
f"์๋ฌ: {str(e)}", | |
None, | |
None, | |
gr.HTML("์๋ฌ๊ฐ ๋ฐ์ํ์ต๋๋ค"), | |
None | |
) | |
with gr.Blocks(theme='ParityError/Interstellar') as demo: | |
with gr.Row(): | |
# ์ผ์ชฝ ์ด: ์ฑ๋ด ๋ฐ ์ปจํธ๋กค ํจ๋ | |
with gr.Column(scale=1): | |
# ์ฑ๋ด ์ธํฐํ์ด์ค | |
gr.Markdown("# ๐ค AI ๋จ๋ฐฑ์ง ์ค๊ณ ๋์ฐ๋ฏธ") | |
chatbot = gr.Chatbot(height=600) | |
with gr.Accordion("์ฑํ ์ค์ ", open=False): | |
system_message = gr.Textbox( | |
value="๋น์ ์ ๋จ๋ฐฑ์ง ์ค๊ณ๋ฅผ ๋์์ฃผ๋ ์ ๋ฌธ๊ฐ์ ๋๋ค.", | |
label="์์คํ ๋ฉ์์ง" | |
) | |
max_tokens = gr.Slider( | |
minimum=1, | |
maximum=2048, | |
value=512, | |
step=1, | |
label="์ต๋ ํ ํฐ ์" | |
) | |
temperature = gr.Slider( | |
minimum=0.1, | |
maximum=4.0, | |
value=0.7, | |
step=0.1, | |
label="Temperature" | |
) | |
top_p = gr.Slider( | |
minimum=0.1, | |
maximum=1.0, | |
value=0.95, | |
step=0.05, | |
label="Top-P" | |
) | |
# ํญ ์ธํฐํ์ด์ค | |
with gr.Tabs(): | |
with gr.TabItem("๐ฆธโโ๏ธ ํ์ด๋ก ๋์์ธ"): | |
gr.Markdown(""" | |
### โจ ๋น์ ๋ง์ ํน๋ณํ ํ์ด๋ก๋ฅผ ๋ง๋ค์ด๋ณด์ธ์! | |
๊ฐ ๋ฅ๋ ฅ์น๋ฅผ ์กฐ์ ํ๋ฉด ํ์ด๋ก์ DNA๊ฐ ์๋์ผ๋ก ์ค๊ณ๋ฉ๋๋ค. | |
""") | |
# ํ์ด๋ก ๊ธฐ๋ณธ ์ ๋ณด | |
hero_name = gr.Textbox( | |
label="ํ์ด๋ก ์ด๋ฆ", | |
placeholder="๋น์ ์ ํ์ด๋ก ์ด๋ฆ์ ์ง์ด์ฃผ์ธ์!", | |
info="ํ์ด๋ก์ ์ ์ฒด์ฑ์ ๋ํ๋ด๋ ์ด๋ฆ์ ์ ๋ ฅํ์ธ์" | |
) | |
# ๋ฅ๋ ฅ์น ์ค์ | |
gr.Markdown("### ๐ช ํ์ด๋ก ๋ฅ๋ ฅ์น ์ค์ ") | |
with gr.Row(): | |
strength = gr.Slider( | |
minimum=0.0, maximum=0.05, | |
label="๐ช ์ด๊ฐ๋ ฅ(๊ทผ๋ ฅ)", | |
value=0.02, | |
info="๋จ๋จํ ๋ฒ ํ์ํธ ๊ตฌ์กฐ๋ก ๊ฐ๋ ฅํ ํ์ ์์ฑํฉ๋๋ค" | |
) | |
flexibility = gr.Slider( | |
minimum=0.0, maximum=0.05, | |
label="๐คธโโ๏ธ ์ ์ฐ์ฑ", | |
value=0.02, | |
info="๋์ ํ ์ํํฌ๋ฆญ์ค ๊ตฌ์กฐ๋ก ์ ์ฐํ ์์ง์์ ๊ฐ๋ฅํ๊ฒ ํฉ๋๋ค" | |
) | |
with gr.Row(): | |
speed = gr.Slider( | |
minimum=0.0, maximum=0.20, | |
label="โก ์คํผ๋", | |
value=0.1, | |
info="๋ฃจํ ๊ตฌ์กฐ๋ก ๋น ๋ฅธ ์์ง์์ ๊ตฌํํฉ๋๋ค" | |
) | |
defense = gr.Slider( | |
minimum=-10, maximum=10, | |
label="๐ก๏ธ ๋ฐฉ์ด๋ ฅ", | |
value=0, | |
info="์์: ์์ค ํ๋์ ํนํ, ์์: ์ง์ ํ๋์ ํนํ" | |
) | |
# ํ์ด๋ก ํฌ๊ธฐ ์ค์ | |
hero_size = gr.Slider( | |
minimum=50, maximum=200, | |
label="๐ ํ์ด๋ก ํฌ๊ธฐ", | |
value=100, | |
info="ํ์ด๋ก์ ์ ์ฒด์ ์ธ ํฌ๊ธฐ๋ฅผ ๊ฒฐ์ ํฉ๋๋ค" | |
) | |
# ํน์ ๋ฅ๋ ฅ ์ค์ | |
with gr.Accordion("๐ ํน์ ๋ฅ๋ ฅ", open=False): | |
gr.Markdown(""" | |
ํน์ ๋ฅ๋ ฅ์ ์ ํํ๋ฉด ํ์ด๋ก์ DNA์ ํน๋ณํ ๊ตฌ์กฐ๊ฐ ์ถ๊ฐ๋ฉ๋๋ค. | |
- ์๊ฐ ํ๋ณต: ๋จ๋ฐฑ์ง ๊ตฌ์กฐ ๋ณต๊ตฌ ๋ฅ๋ ฅ ๊ฐํ | |
- ์๊ฑฐ๋ฆฌ ๊ณต๊ฒฉ: ํน์ํ ๊ตฌ์กฐ์ ๋์ถ๋ถ ํ์ฑ | |
- ๋ฐฉ์ด๋ง ์์ฑ: ์์ ์ ์ธ ๋ณดํธ์ธต ๊ตฌ์กฐ ์์ฑ | |
""") | |
special_ability = gr.CheckboxGroup( | |
choices=["์๊ฐ ํ๋ณต", "์๊ฑฐ๋ฆฌ ๊ณต๊ฒฉ", "๋ฐฉ์ด๋ง ์์ฑ"], | |
label="ํน์ ๋ฅ๋ ฅ ์ ํ" | |
) | |
# ์์ฑ ๋ฒํผ | |
create_btn = gr.Button("๐งฌ ํ์ด๋ก ์์ฑ!", variant="primary", scale=2) | |
with gr.TabItem("๐งฌ ํ์ด๋ก DNA ์ค๊ณ"): | |
gr.Markdown(""" | |
### ๐งช ํ์ด๋ก DNA ๊ณ ๊ธ ์ค์ | |
ํ์ด๋ก์ ์ ์ ์ ๊ตฌ์กฐ๋ฅผ ๋ ์ธ๋ฐํ๊ฒ ์กฐ์ ํ ์ ์์ต๋๋ค. | |
""") | |
seq_opt = gr.Radio( | |
["์๋ ์ค๊ณ", "์ง์ ์ ๋ ฅ"], | |
label="DNA ์ค๊ณ ๋ฐฉ์", | |
value="์๋ ์ค๊ณ" | |
) | |
sequence = gr.Textbox( | |
label="DNA ์ํ์ค", | |
lines=1, | |
placeholder='์ฌ์ฉ ๊ฐ๋ฅํ ์๋ฏธ๋ ธ์ฐ: A,C,D,E,F,G,H,I,K,L,M,N,P,Q,R,S,T,V,W,Y (X๋ ๋ฌด์์)', | |
visible=False | |
) | |
seq_len = gr.Slider( | |
minimum=5.0, maximum=250.0, | |
label="DNA ๊ธธ์ด", | |
value=100, | |
visible=True | |
) | |
with gr.Accordion(label='๐ฆด ๊ณจ๊ฒฉ ๊ตฌ์กฐ ์ค์ ', open=True): | |
gr.Markdown(""" | |
ํ์ด๋ก์ ๊ธฐ๋ณธ ๊ณจ๊ฒฉ ๊ตฌ์กฐ๋ฅผ ์ค์ ํฉ๋๋ค. | |
- ๋์ ํ ๊ตฌ์กฐ: ์ ์ฐํ๊ณ ํ๋ ฅ์๋ ์์ง์ | |
- ๋ณํํ ๊ตฌ์กฐ: ๋จ๋จํ๊ณ ๊ฐ๋ ฅํ ํ | |
- ๊ณ ๋ฆฌํ ๊ตฌ์กฐ: ๋น ๋ฅด๊ณ ๋ฏผ์ฒฉํ ์์ง์ | |
""") | |
sec_str_opt = gr.Radio( | |
["์ฌ๋ผ์ด๋๋ก ์ค์ ", "์ง์ ์ ๋ ฅ"], | |
label="๊ณจ๊ฒฉ ๊ตฌ์กฐ ์ค์ ๋ฐฉ์", | |
value="์ฌ๋ผ์ด๋๋ก ์ค์ " | |
) | |
secondary_structure = gr.Textbox( | |
label="๊ณจ๊ฒฉ ๊ตฌ์กฐ", | |
lines=1, | |
placeholder='H:๋์ ํ, S:๋ณํํ, L:๊ณ ๋ฆฌํ, X:์๋์ค์ ', | |
visible=False | |
) | |
with gr.Column(): | |
helix_bias = gr.Slider( | |
minimum=0.0, maximum=0.05, | |
label="๋์ ํ ๊ตฌ์กฐ ๋น์จ", | |
visible=True | |
) | |
strand_bias = gr.Slider( | |
minimum=0.0, maximum=0.05, | |
label="๋ณํํ ๊ตฌ์กฐ ๋น์จ", | |
visible=True | |
) | |
loop_bias = gr.Slider( | |
minimum=0.0, maximum=0.20, | |
label="๊ณ ๋ฆฌํ ๊ตฌ์กฐ ๋น์จ", | |
visible=True | |
) | |
with gr.Accordion(label='๐งฌ DNA ๊ตฌ์ฑ ์ค์ ', open=False): | |
gr.Markdown(""" | |
ํน์ ์๋ฏธ๋ ธ์ฐ์ ๋น์จ์ ์กฐ์ ํ์ฌ ํ์ด๋ก์ ํน์ฑ์ ๊ฐํํ ์ ์์ต๋๋ค. | |
์์: W0.2,E0.1 (ํธ๋ฆฝํ ํ 20%, ๊ธ๋ฃจํ์ฐ 10%) | |
""") | |
with gr.Row(): | |
aa_bias = gr.Textbox( | |
label="์๋ฏธ๋ ธ์ฐ ๋น์จ", | |
lines=1, | |
placeholder='์์: W0.2,E0.1' | |
) | |
aa_bias_potential = gr.Textbox( | |
label="๊ฐํ ์ ๋", | |
lines=1, | |
placeholder='1.0-5.0 ์ฌ์ด ๊ฐ ์ ๋ ฅ' | |
) | |
with gr.Accordion(label='๐ ํ๊ฒฝ ์ ์๋ ฅ ์ค์ ', open=False): | |
gr.Markdown(""" | |
ํ์ด๋ก์ ํ๊ฒฝ ์ ์๋ ฅ์ ์กฐ์ ํฉ๋๋ค. | |
์์: ์์ค ํ๋์ ํนํ, ์์: ์ง์ ํ๋์ ํนํ | |
""") | |
with gr.Row(): | |
hydrophobic_target_score = gr.Textbox( | |
label="ํ๊ฒฝ ์ ์ ์ ์", | |
lines=1, | |
placeholder='์์: -5 (์์ค ํ๋์ ํนํ)' | |
) | |
hydrophobic_potential = gr.Textbox( | |
label="์ ์๋ ฅ ๊ฐํ ์ ๋", | |
lines=1, | |
placeholder='1.0-2.0 ์ฌ์ด ๊ฐ ์ ๋ ฅ' | |
) | |
with gr.Accordion(label='โ๏ธ ๊ณ ๊ธ ์ค์ ', open=False): | |
gr.Markdown(""" | |
DNA ์์ฑ ๊ณผ์ ์ ์ธ๋ถ ๋งค๊ฐ๋ณ์๋ฅผ ์กฐ์ ํฉ๋๋ค. | |
""") | |
with gr.Row(): | |
num_steps = gr.Textbox( | |
label="์์ฑ ๋จ๊ณ", | |
lines=1, | |
placeholder='25 ์ดํ ๊ถ์ฅ' | |
) | |
noise = gr.Dropdown( | |
['normal','gmm2 [-1,1]','gmm3 [-1,0,1]'], | |
label='๋ ธ์ด์ฆ ํ์ ', | |
value='normal' | |
) | |
design_btn = gr.Button("๐งฌ DNA ์ค๊ณ ์์ฑ!", variant="primary", scale=2) | |
with gr.TabItem("๐งช ํ์ด๋ก ์ ์ ์ ๊ฐํ"): | |
gr.Markdown(""" | |
### โก ๊ธฐ์กด ํ์ด๋ก์ DNA ํ์ฉ | |
๊ฐ๋ ฅํ ํ์ด๋ก์ DNA ์ผ๋ถ๋ฅผ ์๋ก์ด ํ์ด๋ก์๊ฒ ์ด์ํฉ๋๋ค. | |
""") | |
gr.Markdown("๊ณต๊ฐ๋ ํ์ด๋ก DNA ๋ฐ์ดํฐ๋ฒ ์ด์ค์์ ์ฝ๋๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค") | |
pdb_id_code = gr.Textbox( | |
label="ํ์ด๋ก DNA ์ฝ๋", | |
lines=1, | |
placeholder='๊ธฐ์กด ํ์ด๋ก์ DNA ์ฝ๋๋ฅผ ์ ๋ ฅํ์ธ์ (์: 1DPX)' | |
) | |
gr.Markdown("์ด์ํ๊ณ ์ถ์ DNA ์์ญ์ ์ ํํ๊ณ ์๋ก์ด DNA๋ฅผ ์ถ๊ฐํ ์ ์์ต๋๋ค") | |
contigs = gr.Textbox( | |
label="์ด์ํ DNA ์์ญ", | |
lines=1, | |
placeholder='์์: 15,A3-10,20-30' | |
) | |
with gr.Row(): | |
seq_mask = gr.Textbox( | |
label='๋ฅ๋ ฅ ์ฌ์ค๊ณ', | |
lines=1, | |
placeholder='์ ํํ ์์ญ์ ๋ฅ๋ ฅ์ ์๋กญ๊ฒ ๋์์ธ' | |
) | |
str_mask = gr.Textbox( | |
label='๊ตฌ์กฐ ์ฌ์ค๊ณ', | |
lines=1, | |
placeholder='์ ํํ ์์ญ์ ๊ตฌ์กฐ๋ฅผ ์๋กญ๊ฒ ๋์์ธ' | |
) | |
preview_viewer = gr.HTML() | |
rewrite_pdb = gr.File(label='ํ์ด๋ก DNA ํ์ผ') | |
preview_btn = gr.Button("๐ ๋ฏธ๋ฆฌ๋ณด๊ธฐ", variant="secondary") | |
enhance_btn = gr.Button("โก ๊ฐํ๋ ํ์ด๋ก ์์ฑ!", variant="primary", scale=2) | |
with gr.TabItem("๐ ํ์ด๋ก ๊ฐ๋ฌธ"): | |
gr.Markdown(""" | |
### ๐ฐ ์๋ํ ํ์ด๋ก ๊ฐ๋ฌธ์ ์ ์ฐ | |
๊ฐ๋ ฅํ ํ์ด๋ก ๊ฐ๋ฌธ์ ํน์ฑ์ ๊ณ์นํ์ฌ ์๋ก์ด ํ์ด๋ก๋ฅผ ๋ง๋ญ๋๋ค. | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("ํ์ด๋ก ๊ฐ๋ฌธ์ DNA ์ ๋ณด๊ฐ ๋ด๊ธด ํ์ผ์ ์ ๋ก๋ํ์ธ์") | |
fasta_msa = gr.File(label='๊ฐ๋ฌธ DNA ๋ฐ์ดํฐ') | |
with gr.Column(): | |
gr.Markdown("์ด๋ฏธ ๋ถ์๋ ๊ฐ๋ฌธ ํน์ฑ ๋ฐ์ดํฐ๊ฐ ์๋ค๋ฉด ์ ๋ก๋ํ์ธ์") | |
input_pssm = gr.File(label='๊ฐ๋ฌธ ํน์ฑ ๋ฐ์ดํฐ') | |
pssm = gr.File(label='๋ถ์๋ ๊ฐ๋ฌธ ํน์ฑ') | |
pssm_view = gr.Plot(label='๊ฐ๋ฌธ ํน์ฑ ๋ถ์ ๊ฒฐ๊ณผ') | |
pssm_gen_btn = gr.Button("โจ ๊ฐ๋ฌธ ํน์ฑ ๋ถ์", variant="secondary") | |
inherit_btn = gr.Button("๐ ๊ฐ๋ฌธ์ ํ ๊ณ์น!", variant="primary", scale=2) | |
# ์ค๋ฅธ์ชฝ ์ด: ๊ฒฐ๊ณผ ํ์ | |
with gr.Column(scale=1): | |
gr.Markdown("## ๐ฆธโโ๏ธ ํ์ด๋ก ํ๋กํ") | |
hero_stats = gr.Plot(label="๋ฅ๋ ฅ์น ๋ถ์") | |
hero_description = gr.Textbox(label="ํ์ด๋ก ํน์ฑ", lines=3) | |
gr.Markdown("## ๐งฌ ํ์ด๋ก DNA ๋ถ์ ๊ฒฐ๊ณผ") | |
gr.Markdown("#### โก DNA ์์ ์ฑ ์ ์") | |
plddt_plot = gr.Plot(label='์์ ์ฑ ๋ถ์') | |
gr.Markdown("#### ๐ DNA ์ํ์ค") | |
output_seq = gr.Textbox(label="DNA ์์ด") | |
gr.Markdown("#### ๐พ DNA ๋ฐ์ดํฐ") | |
output_pdb = gr.File(label="DNA ํ์ผ") | |
gr.Markdown("#### ๐ฌ DNA ๊ตฌ์กฐ") | |
output_viewer = gr.HTML() | |
# ์ด๋ฒคํธ ์ฐ๊ฒฐ | |
# ์ฑ๋ด ์ด๋ฒคํธ | |
msg.submit(process_chat, [msg, chatbot], [chatbot]) | |
clear.click(lambda: None, None, chatbot, queue=False) | |
# UI ์ปจํธ๋กค ์ด๋ฒคํธ | |
seq_opt.change( | |
fn=toggle_seq_input, | |
inputs=[seq_opt], | |
outputs=[seq_len, sequence], | |
queue=False | |
) | |
sec_str_opt.change( | |
fn=toggle_secondary_structure, | |
inputs=[sec_str_opt], | |
outputs=[helix_bias, strand_bias, loop_bias, secondary_structure], | |
queue=False | |
) | |
preview_btn.click( | |
get_motif_preview, | |
inputs=[pdb_id_code, contigs], | |
outputs=[preview_viewer, rewrite_pdb] | |
) | |
pssm_gen_btn.click( | |
get_pssm, | |
inputs=[fasta_msa, input_pssm], | |
outputs=[pssm_view, pssm] | |
) | |
# ์ฑ๋ด ๊ธฐ๋ฐ ๋จ๋ฐฑ์ง ์์ฑ ๊ฒฐ๊ณผ ์ ๋ฐ์ดํธ | |
def update_protein_display(chat_response): | |
if "์์ฑ๋ ๋จ๋ฐฑ์ง ๋ถ์" in chat_response: | |
params = extract_parameters_from_chat(chat_response) | |
result = generate_protein(params) | |
return { | |
hero_stats: create_radar_chart(calculate_hero_stats(params)), | |
hero_description: chat_response, | |
output_seq: result[0], | |
output_pdb: result[1], | |
output_viewer: display_pdb(result[1]), | |
plddt_plot: result[3] | |
} | |
return None | |
# ๊ฐ ์์ฑ ๋ฒํผ ์ด๋ฒคํธ ์ฐ๊ฒฐ | |
for btn in [create_btn, design_btn, enhance_btn, inherit_btn]: | |
btn.click( | |
combined_generation, | |
inputs=[ | |
hero_name, strength, flexibility, speed, defense, hero_size, special_ability, | |
sequence, seq_len, helix_bias, strand_bias, loop_bias, | |
secondary_structure, aa_bias, aa_bias_potential, | |
num_steps, noise, hydrophobic_target_score, hydrophobic_potential, | |
contigs, pssm, seq_mask, str_mask, rewrite_pdb | |
], | |
outputs=[ | |
hero_stats, | |
hero_description, | |
output_seq, | |
output_pdb, | |
output_viewer, | |
plddt_plot | |
] | |
) | |
# ์ฑ๋ด ์๋ต์ ๋ฐ๋ฅธ ๊ฒฐ๊ณผ ์ ๋ฐ์ดํธ | |
msg.submit( | |
update_protein_display, | |
inputs=[chatbot], | |
outputs=[hero_stats, hero_description, output_seq, output_pdb, output_viewer, plddt_plot] | |
) | |
chat_interface = gr.ChatInterface( | |
respond, | |
additional_inputs=[ | |
system_message, | |
max_tokens, | |
temperature, | |
top_p, | |
], | |
chatbot=chatbot, | |
) | |
# ์คํ | |
demo.queue() | |
demo.launch(debug=True) |