import gradio as gr from transformers import AutoModelForSequenceClassification, AutoTokenizer import torch.nn.functional as F placeholder = 'GATGCTACTGCTAGCTAATCAGTAATCACCAATGCATAAACACAACACATGCCTTCGTTCCAAAGTTTTCATTCCTCGTCATAGACTTAAAGAAGGGGCAACAAGTTCTCTACGAGTCTTCTGGACTGGACTGGCTACCCCCTCGGCCCATTCTGGCCCAGTTGCGGGCGGCCTTTCATTTAATAAATATTTCTAATAGATATAAATTATTTTATCTAATATTATTAATTTTTTTCTTATAAAACATATAAT' model_names = ['plant-dnabert', 'plant-dnagpt', 'plant-nucleotide-transformer', 'plant-dnagemma', 'dnabert2', 'nucleotide-transformer-v2-100m', 'agront-1b'] tokenizer_type = "singlebase" model_names = [x + '-' + tokenizer_type if x.startswith("plant") else x for x in model_names] task_map = { "promoter": ["Not promoter", "Core promoter"], "conservation": ["Not conserved", "Conserved"], "H3K27ac": ["Not H3K27ac", "H3K27ac"], "H3K27me3": ["Not H3K27me3", "H3K27me3"], "H3K4me3": ["Not H3K4me3", "H3K4me3"], "lncRNAs": ["Not lncRNA", "lncRNA"], "open_chromatin": ['Not open chromatin', 'Full open chromatin', 'Partial open chromatin'], } task_lists = task_map.keys() def inference(seq,model,task): if not seq: gr.Warning("No sequence provided, use the default sequence.") seq = placeholder # Load model and tokenizer model_name = f'zhangtaolab/{model}-{task}' model = AutoModelForSequenceClassification.from_pretrained(model_name,ignore_mismatched_sizes=True) tokenizer = AutoTokenizer.from_pretrained(model_name) # Inference inputs = tokenizer(seq, return_tensors='pt', padding=True, truncation=True, max_length=512) outputs = model(**inputs) probabilities = F.softmax(outputs.logits,dim=-1).tolist()[0] #Map probabilities to labels labels = task_map[task] result = {labels[i]: probabilities[i] for i in range(len(labels))} return result # Create Gradio interface with gr.Blocks() as demo: gr.HTML( """