Spaces:
Running
Running
import streamlit as st | |
from transformers import GPT2LMHeadModel, GPT2Tokenizer | |
import torch | |
st.set_page_config(page_title="NeoProtein Designer", page_icon="🧬") | |
st.title("🧬 NeoProtein-GPT Protein Designer") | |
st.markdown(""" | |
### Design novel protein sequences with unique binding sites | |
*Using the [NeoProtein-GPT](https://huggingface.co/ayyuce/NeoProtein-GPT) model* | |
""") | |
with st.sidebar: | |
st.header("Parameters") | |
binding_motif = st.text_input("Binding site motif (e.g., AXXC):", help="Use X for wildcard positions") | |
seq_length = st.slider("Sequence length", 50, 500, 150) | |
temperature = st.slider("Temperature (creativity)", 0.1, 2.0, 1.0) | |
num_sequences = st.slider("Number of sequences", 1, 5, 3) | |
def load_model(): | |
model = GPT2LMHeadModel.from_pretrained( | |
"ayyuce/NeoProtein-GPT", | |
force_download=True, | |
resume_download=False, | |
local_files_only=False, | |
trust_remote_code=True | |
) | |
tokenizer = GPT2Tokenizer.from_pretrained("ayyuce/NeoProtein-GPT") | |
return model, tokenizer | |
model, tokenizer = load_model() | |
def generate_sequences(): | |
if not binding_motif: | |
st.error("Please enter a binding motif") | |
return | |
prompt = f"<start>BindingMotif:{binding_motif}<start>Seq:" | |
try: | |
inputs = tokenizer(prompt, return_tensors="pt") | |
input_length = inputs.input_ids.shape[1] | |
outputs = model.generate( | |
inputs.input_ids, | |
max_length=input_length + seq_length, | |
temperature=temperature, | |
do_sample=True, | |
top_k=50, | |
top_p=0.95, | |
num_return_sequences=num_sequences, | |
pad_token_id=tokenizer.eos_token_id | |
) | |
generated_sequences = [ | |
tokenizer.decode(output[input_length:], skip_special_tokens=True) | |
for output in outputs | |
] | |
return generated_sequences | |
except Exception as e: | |
st.error(f"Generation failed: {str(e)}") | |
return [] | |
if st.button("Generate Protein Sequences"): | |
with st.spinner("Designing novel proteins..."): | |
sequences = generate_sequences() | |
if sequences: | |
st.subheader("Generated Sequences") | |
for i, seq in enumerate(sequences): | |
st.markdown(f""" | |
**Sequence #{i+1}** | |
```fasta | |
{seq} | |
``` | |
""") | |
st.markdown(""" | |
### How to use: | |
1. Enter your target binding motif using single-letter amino acid codes | |
2. Adjust parameters in the sidebar | |
3. Click the generate button | |
4. Results will appear in FASTA format | |
**Example motifs:** | |
- `GHXXXH` for histidine-rich motifs | |
- `CXXC` for disulfide bond motifs | |
- `DE` for acidic patches | |
""") |