import gradio as gr base_pairing = {'A':'U', 'U':'A', 'C':'G','G':'C'} def transcribe(seq): RNA_seq = seq.replace('T', 'U') ### Define location of start codon (AUG) start_codon = RNA_seq.find('AUG') codons = [] for i in range(start_codon, len(RNA_seq), 3): codon = RNA_seq[i:i + 3] if len(codon)==3: codons.append(codon) return codons ####################################################################################### # define dictionary to translate three-letter mRNA to three-letter amino acid here ####################################################################################### translate = { 'UUU':'Phe','UCU':'Ser','UAU':'Tyr','UGU':'Cys', 'UUC':'Phe','UCC':'Ser','UAC':'Tyr','UGC':'Cys', 'UUA':'Leu','UCA':'Ser','UAA':'Stop','UGA':'Stop', 'UUG':'Leu','UCG':'Ser','UAG':'Stop','UGG':'Trp', 'CUU':'Leu','CCU':'Pro','CAU':'His','CGU':'Arg', 'CUC':'Leu','CCC':'Pro','CAC':'His','CGC':'Arg', 'CUA':'Leu','CCA':'Pro','CAA':'Gln','CGA':'Arg', 'CUG':'Leu','CCG':'Pro','CAG':'Gln','CGG':'Arg', 'AUU':'Ile','ACU':'Thr','AAU':'Asn','AGU':'Ser', 'AUC':'Ile','ACC':'Thr','AAC':'Asn','AGC':'Ser', 'AUA':'Ile','ACA':'Thr','AAA':'Lys','AGA':'Arg', 'AUG':'Met','ACG':'Thr','AAG':'Lys','AGG':'Arg', 'GUU':'Val','GCU':'Ala','GAU':'Asp','GGU':'Gly', 'GUC':'Val','GCC':'Ala','GAC':'Asp','GGC':'Gly', 'GUA':'Val','GCA':'Ala','GAA':'Glu','GGA':'Gly', 'GUG':'Val','GCG':'Ala','GAG':'Glu','GGG':'Gly' } ####################################################################################### # define dictionary to map three-letter amino acid to one-letter amino acid here ####################################################################################### AA_mapping = { 'Ala':'A', 'Arg':'R','Asn':'N', 'Asp':'D', 'Cys':'C','Gln':'Q', 'Glu':'E', 'Gly':'G','His':'H', 'Ile':'I', 'Leu':'L','Lys':'K', 'Met':'M', 'Phe':'F','Pro':'P', 'Ser':'S', 'Thr':'T','Trp':'W', 'Tyr':'Y','Val':'V' } def transcribe(seq): RNA_seq = seq.replace('T', 'U') reading_frames = [] # forward strand for pos in range(0,3): codons = [] for i in range(pos, len(RNA_seq), 3): codon = RNA_seq[i:i + 3] if len(codon)==3: codons.append(codon) reading_frames.append(codons) # reverse strand RNA_seq_rev = RNA_seq[::-1] cDNA = '' for i in range(0, len(RNA_seq_rev)): cDNA += base_pairing[RNA_seq_rev[i]] for pos in range(0,3): codons = [] for i in range(pos, len(cDNA), 3): codon = cDNA[i:i + 3] if len(codon)==3: codons.append(codon) reading_frames.append(codons) return reading_frames def get_protein_from_DNA(DNA): reading_frames =transcribe(DNA) orf_list = [] for i in range(len(reading_frames)): mRNA = reading_frames[i] one_letter='' found_start = False for codon in mRNA: if codon =='AUG' and not found_start: one_letter += '<' found_start = True if codon =='UAA' or codon=='UGA'or codon =='UAG': if found_start: one_letter += '>' found_start = False continue one_letter += AA_mapping[translate[codon]] # mark the valid orf found if one_letter.find('M') != -1: one_letter = '*' + one_letter + '>' orf_list.append(one_letter) r = " * means the ORF sequence exists and highlighted in as <>; \n5'to 3' Frame 1:"+ orf_list[0] +"\n5'to 3' Frame 2:" + orf_list[1] + "\n5'to 3' Frame 3:" + orf_list[2] + "\n3'to 5' Frame 1:" + orf_list[3] + "\n3'to 5' Frame 2:" + orf_list[4] + "\n3'to 5' Frame 3:"+orf_list[5] return r #results = get_protein_from_DNA(DNA) #def highlight_text(string_in): #string_in=string_in.replace('<','\033[1m\033[94m') #string_in=string_in.replace('>','\033[0m') #print(string_in) #highlight_text(results) ##configure inputs/outputs input_module = gr.inputs.Textbox(label = "Enter your DNA sequence in the box:") output_module = gr.outputs.Textbox(label = 'Protein sequences from ORFs 1-6:') ### configure gradio, detailed can be found at https://www.gradio.app/docs/#i_slider interface = gr.Interface(fn=get_protein_from_DNA, inputs=input_module, outputs=output_module, title="CSCI1020 Demo 2: Web Application for DNA Transcription & RNA Translation", description= "Click examples below for a quick demo", theme = 'huggingface', layout = 'vertical' ) interface.launch()