NeoProtein-GPT / app.py
ayyuce's picture
Update app.py
9d0e799 verified
import streamlit as st
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
st.set_page_config(page_title="NeoProtein Designer", page_icon="🧬")
st.title("🧬 NeoProtein-GPT Protein Designer")
st.markdown("""
### Design novel protein sequences with unique binding sites
*Using the [NeoProtein-GPT](https://huggingface.co/ayyuce/NeoProtein-GPT) model*
""")
with st.sidebar:
st.header("Parameters")
binding_motif = st.text_input("Binding site motif (e.g., AXXC):", help="Use X for wildcard positions")
seq_length = st.slider("Sequence length", 50, 500, 150)
temperature = st.slider("Temperature (creativity)", 0.1, 2.0, 1.0)
num_sequences = st.slider("Number of sequences", 1, 5, 3)
@st.cache_resource(show_spinner=False)
def load_model():
model = GPT2LMHeadModel.from_pretrained(
"ayyuce/NeoProtein-GPT",
force_download=True,
resume_download=False,
local_files_only=False,
trust_remote_code=True
)
tokenizer = GPT2Tokenizer.from_pretrained("ayyuce/NeoProtein-GPT")
return model, tokenizer
model, tokenizer = load_model()
def generate_sequences():
if not binding_motif:
st.error("Please enter a binding motif")
return
prompt = f"<start>BindingMotif:{binding_motif}<start>Seq:"
try:
inputs = tokenizer(prompt, return_tensors="pt")
input_length = inputs.input_ids.shape[1]
outputs = model.generate(
inputs.input_ids,
max_length=input_length + seq_length,
temperature=temperature,
do_sample=True,
top_k=50,
top_p=0.95,
num_return_sequences=num_sequences,
pad_token_id=tokenizer.eos_token_id
)
generated_sequences = [
tokenizer.decode(output[input_length:], skip_special_tokens=True)
for output in outputs
]
return generated_sequences
except Exception as e:
st.error(f"Generation failed: {str(e)}")
return []
if st.button("Generate Protein Sequences"):
with st.spinner("Designing novel proteins..."):
sequences = generate_sequences()
if sequences:
st.subheader("Generated Sequences")
for i, seq in enumerate(sequences):
st.markdown(f"""
**Sequence #{i+1}**
```fasta
{seq}
```
""")
st.markdown("""
### How to use:
1. Enter your target binding motif using single-letter amino acid codes
2. Adjust parameters in the sidebar
3. Click the generate button
4. Results will appear in FASTA format
**Example motifs:**
- `GHXXXH` for histidine-rich motifs
- `CXXC` for disulfide bond motifs
- `DE` for acidic patches
""")