kipfriend's picture
Update app.py
e3cd57f
import gradio as gr
base_pairing = {'A':'U', 'U':'A', 'C':'G','G':'C'}
def transcribe(seq):
RNA_seq = seq.replace('T', 'U')
### Define location of start codon (AUG)
start_codon = RNA_seq.find('AUG')
codons = []
for i in range(start_codon, len(RNA_seq), 3):
codon = RNA_seq[i:i + 3]
if len(codon)==3:
codons.append(codon)
return codons
#######################################################################################
# define dictionary to translate three-letter mRNA to three-letter amino acid here
#######################################################################################
translate = {
'UUU':'Phe','UCU':'Ser','UAU':'Tyr','UGU':'Cys',
'UUC':'Phe','UCC':'Ser','UAC':'Tyr','UGC':'Cys',
'UUA':'Leu','UCA':'Ser','UAA':'Stop','UGA':'Stop',
'UUG':'Leu','UCG':'Ser','UAG':'Stop','UGG':'Trp',
'CUU':'Leu','CCU':'Pro','CAU':'His','CGU':'Arg',
'CUC':'Leu','CCC':'Pro','CAC':'His','CGC':'Arg',
'CUA':'Leu','CCA':'Pro','CAA':'Gln','CGA':'Arg',
'CUG':'Leu','CCG':'Pro','CAG':'Gln','CGG':'Arg',
'AUU':'Ile','ACU':'Thr','AAU':'Asn','AGU':'Ser',
'AUC':'Ile','ACC':'Thr','AAC':'Asn','AGC':'Ser',
'AUA':'Ile','ACA':'Thr','AAA':'Lys','AGA':'Arg',
'AUG':'Met','ACG':'Thr','AAG':'Lys','AGG':'Arg',
'GUU':'Val','GCU':'Ala','GAU':'Asp','GGU':'Gly',
'GUC':'Val','GCC':'Ala','GAC':'Asp','GGC':'Gly',
'GUA':'Val','GCA':'Ala','GAA':'Glu','GGA':'Gly',
'GUG':'Val','GCG':'Ala','GAG':'Glu','GGG':'Gly'
}
#######################################################################################
# define dictionary to map three-letter amino acid to one-letter amino acid here
#######################################################################################
AA_mapping = {
'Ala':'A', 'Arg':'R','Asn':'N',
'Asp':'D', 'Cys':'C','Gln':'Q',
'Glu':'E', 'Gly':'G','His':'H',
'Ile':'I', 'Leu':'L','Lys':'K',
'Met':'M', 'Phe':'F','Pro':'P',
'Ser':'S', 'Thr':'T','Trp':'W',
'Tyr':'Y','Val':'V'
}
def transcribe(seq):
RNA_seq = seq.replace('T', 'U')
reading_frames = []
# forward strand
for pos in range(0,3):
codons = []
for i in range(pos, len(RNA_seq), 3):
codon = RNA_seq[i:i + 3]
if len(codon)==3:
codons.append(codon)
reading_frames.append(codons)
# reverse strand
RNA_seq_rev = RNA_seq[::-1]
cDNA = ''
for i in range(0, len(RNA_seq_rev)):
cDNA += base_pairing[RNA_seq_rev[i]]
for pos in range(0,3):
codons = []
for i in range(pos, len(cDNA), 3):
codon = cDNA[i:i + 3]
if len(codon)==3:
codons.append(codon)
reading_frames.append(codons)
return reading_frames
def get_protein_from_DNA(DNA):
reading_frames =transcribe(DNA)
orf_list = []
for i in range(len(reading_frames)):
mRNA = reading_frames[i]
one_letter=''
found_start = False
for codon in mRNA:
if codon =='AUG' and not found_start:
one_letter += '<'
found_start = True
if codon =='UAA' or codon=='UGA'or codon =='UAG':
if found_start:
one_letter += '>'
found_start = False
continue
one_letter += AA_mapping[translate[codon]]
# mark the valid orf found
if one_letter.find('M') != -1:
one_letter = '*' + one_letter + '>'
orf_list.append(one_letter)
r = " * means the ORF sequence exists and highlighted in as <>; \n5'to 3' Frame 1:"+ orf_list[0] +"\n5'to 3' Frame 2:" + orf_list[1] + "\n5'to 3' Frame 3:" + orf_list[2] + "\n3'to 5' Frame 1:" + orf_list[3] + "\n3'to 5' Frame 2:" + orf_list[4] + "\n3'to 5' Frame 3:"+orf_list[5]
return r
#results = get_protein_from_DNA(DNA)
#def highlight_text(string_in):
#string_in=string_in.replace('<','\033[1m\033[94m')
#string_in=string_in.replace('>','\033[0m')
#print(string_in)
#highlight_text(results)
##configure inputs/outputs
input_module = gr.inputs.Textbox(label = "Enter your DNA sequence in the box:")
output_module = gr.outputs.Textbox(label = 'Protein sequences from ORFs 1-6:')
### configure gradio, detailed can be found at https://www.gradio.app/docs/#i_slider
interface = gr.Interface(fn=get_protein_from_DNA,
inputs=input_module,
outputs=output_module,
title="CSCI1020 Demo 2: Web Application for DNA Transcription & RNA Translation",
description= "Click examples below for a quick demo",
theme = 'huggingface',
layout = 'vertical'
)
interface.launch()