File size: 4,626 Bytes
9fd0da5 610475a 9fd0da5 610475a 9fd0da5 610475a 9fd0da5 a43920a 9fd0da5 610475a 9fd0da5 610475a 9fd0da5 e3cd57f 9fd0da5 e3cd57f 9fd0da5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import gradio as gr
base_pairing = {'A':'U', 'U':'A', 'C':'G','G':'C'}
def transcribe(seq):
RNA_seq = seq.replace('T', 'U')
### Define location of start codon (AUG)
start_codon = RNA_seq.find('AUG')
codons = []
for i in range(start_codon, len(RNA_seq), 3):
codon = RNA_seq[i:i + 3]
if len(codon)==3:
codons.append(codon)
return codons
#######################################################################################
# define dictionary to translate three-letter mRNA to three-letter amino acid here
#######################################################################################
translate = {
'UUU':'Phe','UCU':'Ser','UAU':'Tyr','UGU':'Cys',
'UUC':'Phe','UCC':'Ser','UAC':'Tyr','UGC':'Cys',
'UUA':'Leu','UCA':'Ser','UAA':'Stop','UGA':'Stop',
'UUG':'Leu','UCG':'Ser','UAG':'Stop','UGG':'Trp',
'CUU':'Leu','CCU':'Pro','CAU':'His','CGU':'Arg',
'CUC':'Leu','CCC':'Pro','CAC':'His','CGC':'Arg',
'CUA':'Leu','CCA':'Pro','CAA':'Gln','CGA':'Arg',
'CUG':'Leu','CCG':'Pro','CAG':'Gln','CGG':'Arg',
'AUU':'Ile','ACU':'Thr','AAU':'Asn','AGU':'Ser',
'AUC':'Ile','ACC':'Thr','AAC':'Asn','AGC':'Ser',
'AUA':'Ile','ACA':'Thr','AAA':'Lys','AGA':'Arg',
'AUG':'Met','ACG':'Thr','AAG':'Lys','AGG':'Arg',
'GUU':'Val','GCU':'Ala','GAU':'Asp','GGU':'Gly',
'GUC':'Val','GCC':'Ala','GAC':'Asp','GGC':'Gly',
'GUA':'Val','GCA':'Ala','GAA':'Glu','GGA':'Gly',
'GUG':'Val','GCG':'Ala','GAG':'Glu','GGG':'Gly'
}
#######################################################################################
# define dictionary to map three-letter amino acid to one-letter amino acid here
#######################################################################################
AA_mapping = {
'Ala':'A', 'Arg':'R','Asn':'N',
'Asp':'D', 'Cys':'C','Gln':'Q',
'Glu':'E', 'Gly':'G','His':'H',
'Ile':'I', 'Leu':'L','Lys':'K',
'Met':'M', 'Phe':'F','Pro':'P',
'Ser':'S', 'Thr':'T','Trp':'W',
'Tyr':'Y','Val':'V'
}
def transcribe(seq):
RNA_seq = seq.replace('T', 'U')
reading_frames = []
# forward strand
for pos in range(0,3):
codons = []
for i in range(pos, len(RNA_seq), 3):
codon = RNA_seq[i:i + 3]
if len(codon)==3:
codons.append(codon)
reading_frames.append(codons)
# reverse strand
RNA_seq_rev = RNA_seq[::-1]
cDNA = ''
for i in range(0, len(RNA_seq_rev)):
cDNA += base_pairing[RNA_seq_rev[i]]
for pos in range(0,3):
codons = []
for i in range(pos, len(cDNA), 3):
codon = cDNA[i:i + 3]
if len(codon)==3:
codons.append(codon)
reading_frames.append(codons)
return reading_frames
def get_protein_from_DNA(DNA):
reading_frames =transcribe(DNA)
orf_list = []
for i in range(len(reading_frames)):
mRNA = reading_frames[i]
one_letter=''
found_start = False
for codon in mRNA:
if codon =='AUG' and not found_start:
one_letter += '<'
found_start = True
if codon =='UAA' or codon=='UGA'or codon =='UAG':
if found_start:
one_letter += '>'
found_start = False
continue
one_letter += AA_mapping[translate[codon]]
# mark the valid orf found
if one_letter.find('M') != -1:
one_letter = '*' + one_letter + '>'
orf_list.append(one_letter)
r = " * means the ORF sequence exists and highlighted in as <>; \n5'to 3' Frame 1:"+ orf_list[0] +"\n5'to 3' Frame 2:" + orf_list[1] + "\n5'to 3' Frame 3:" + orf_list[2] + "\n3'to 5' Frame 1:" + orf_list[3] + "\n3'to 5' Frame 2:" + orf_list[4] + "\n3'to 5' Frame 3:"+orf_list[5]
return r
#results = get_protein_from_DNA(DNA)
#def highlight_text(string_in):
#string_in=string_in.replace('<','\033[1m\033[94m')
#string_in=string_in.replace('>','\033[0m')
#print(string_in)
#highlight_text(results)
##configure inputs/outputs
input_module = gr.inputs.Textbox(label = "Enter your DNA sequence in the box:")
output_module = gr.outputs.Textbox(label = 'Protein sequences from ORFs 1-6:')
### configure gradio, detailed can be found at https://www.gradio.app/docs/#i_slider
interface = gr.Interface(fn=get_protein_from_DNA,
inputs=input_module,
outputs=output_module,
title="CSCI1020 Demo 2: Web Application for DNA Transcription & RNA Translation",
description= "Click examples below for a quick demo",
theme = 'huggingface',
layout = 'vertical'
)
interface.launch()
|