Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
base_pairing = {'A':'U', 'U':'A', 'C':'G','G':'C'}
|
4 |
+
|
5 |
+
#def transcribe(seq):
|
6 |
+
#RNA_seq = seq.replace('T', 'U')
|
7 |
+
### Define location of start codon (AUG)
|
8 |
+
#start_codon = RNA_seq.find('AUG')
|
9 |
+
#codons = []
|
10 |
+
|
11 |
+
#for i in range(start_codon, len(RNA_seq), 3):
|
12 |
+
#codon = RNA_seq[i:i + 3]
|
13 |
+
#if len(codon)==3:
|
14 |
+
#codons.append(codon)
|
15 |
+
#return codons
|
16 |
+
|
17 |
+
#######################################################################################
|
18 |
+
# define dictionary to translate three-letter mRNA to three-letter amino acid here
|
19 |
+
#######################################################################################
|
20 |
+
|
21 |
+
translate = {
|
22 |
+
'UUU':'Phe','UCU':'Ser','UAU':'Tyr','UGU':'Cys',
|
23 |
+
'UUC':'Phe','UCC':'Ser','UAC':'Tyr','UGC':'Cys',
|
24 |
+
'UUA':'Leu','UCA':'Ser','UAA':'Stop','UGA':'Stop',
|
25 |
+
'UUG':'Leu','UCG':'Ser','UAG':'Stop','UGG':'Trp',
|
26 |
+
'CUU':'Leu','CCU':'Pro','CAU':'His','CGU':'Arg',
|
27 |
+
'CUC':'Leu','CCC':'Pro','CAC':'His','CGC':'Arg',
|
28 |
+
'CUA':'Leu','CCA':'Pro','CAA':'Gln','CGA':'Arg',
|
29 |
+
'CUG':'Leu','CCG':'Pro','CAG':'Gln','CGG':'Arg',
|
30 |
+
'AUU':'Ile','ACU':'Thr','AAU':'Asn','AGU':'Ser',
|
31 |
+
'AUC':'Ile','ACC':'Thr','AAC':'Asn','AGC':'Ser',
|
32 |
+
'AUA':'Ile','ACA':'Thr','AAA':'Lys','AGA':'Arg',
|
33 |
+
'AUG':'Met','ACG':'Thr','AAG':'Lys','AGG':'Arg',
|
34 |
+
'GUU':'Val','GCU':'Ala','GAU':'Asp','GGU':'Gly',
|
35 |
+
'GUC':'Val','GCC':'Ala','GAC':'Asp','GGC':'Gly',
|
36 |
+
'GUA':'Val','GCA':'Ala','GAA':'Glu','GGA':'Gly',
|
37 |
+
'GUG':'Val','GCG':'Ala','GAG':'Glu','GGG':'Gly'
|
38 |
+
}
|
39 |
+
|
40 |
+
#######################################################################################
|
41 |
+
# define dictionary to map three-letter amino acid to one-letter amino acid here
|
42 |
+
#######################################################################################
|
43 |
+
|
44 |
+
AA_mapping = {
|
45 |
+
'Ala':'A', 'Arg':'R','Asn':'N',
|
46 |
+
'Asp':'D', 'Cys':'C','Gln':'Q',
|
47 |
+
'Glu':'E', 'Gly':'G','His':'H',
|
48 |
+
'Ile':'I', 'Leu':'L','Lys':'K',
|
49 |
+
'Met':'M', 'Phe':'F','Pro':'P',
|
50 |
+
'Ser':'S', 'Thr':'T','Trp':'W',
|
51 |
+
'Tyr':'Y','Val':'V'
|
52 |
+
}
|
53 |
+
|
54 |
+
def transcribe(seq):
|
55 |
+
RNA_seq = seq.replace('T', 'U')
|
56 |
+
reading_frames = []
|
57 |
+
# forward strand
|
58 |
+
|
59 |
+
for pos in range(0,3):
|
60 |
+
codons = []
|
61 |
+
for i in range(pos, len(RNA_seq), 3):
|
62 |
+
codon = RNA_seq[i:i + 3]
|
63 |
+
if len(codon)==3:
|
64 |
+
codons.append(codon)
|
65 |
+
reading_frames.append(codons)
|
66 |
+
|
67 |
+
# reverse strand
|
68 |
+
RNA_seq_rev = RNA_seq[::-1]
|
69 |
+
cDNA = ''
|
70 |
+
for i in range(0, len(RNA_seq_rev)):
|
71 |
+
cDNA += base_pairing[RNA_seq_rev[i]]
|
72 |
+
|
73 |
+
for pos in range(0,3):
|
74 |
+
codons = []
|
75 |
+
for i in range(pos, len(cDNA), 3):
|
76 |
+
codon = cDNA[i:i + 3]
|
77 |
+
if len(codon)==3:
|
78 |
+
codons.append(codon)
|
79 |
+
reading_frames.append(codons)
|
80 |
+
|
81 |
+
return reading_frames
|
82 |
+
|
83 |
+
def get_protein_from_DNA(DNA):
|
84 |
+
reading_frames =transcribe(DNA)
|
85 |
+
orf_list = []
|
86 |
+
|
87 |
+
for i in range(len(reading_frames)):
|
88 |
+
mRNA = reading_frames[i]
|
89 |
+
one_letter=''
|
90 |
+
found_start = False
|
91 |
+
for codon in mRNA:
|
92 |
+
if codon =='AUG' and not found_start:
|
93 |
+
one_letter += '<'
|
94 |
+
found_start = True
|
95 |
+
if codon =='UAA' or codon=='UGA'or codon =='UAG':
|
96 |
+
if found_start:
|
97 |
+
one_letter += '>'
|
98 |
+
found_start = False
|
99 |
+
continue
|
100 |
+
one_letter += AA_mapping[translate[codon]]
|
101 |
+
|
102 |
+
# mark the valid orf found
|
103 |
+
if one_letter.find('M') != -1:
|
104 |
+
one_letter = '*' + one_letter + '>'
|
105 |
+
orf_list.append(one_letter)
|
106 |
+
|
107 |
+
r = " * means the ORF sequence exists and highlighted in as <>; \n5'to 3' Frame 1:"+ orf_list[0] +"\n5'to 3' Frame 2:" + orf_list[1] + "\n5'to 3' Frame 3:" + orf_list[2] + "\n3'to 5' Frame 1:" + orf_list[3] + "\n3'to 5' Frame 2:" + orf_list[4] + "\n3'to 5' Frame 3:"+orf_list[5]
|
108 |
+
return r
|
109 |
+
|
110 |
+
|
111 |
+
results = get_protein_from_DNA(DNA)
|
112 |
+
|
113 |
+
def highlight_text(string_in):
|
114 |
+
string_in=string_in.replace('<','\033[1m\033[94m')
|
115 |
+
string_in=string_in.replace('>','\033[0m')
|
116 |
+
print(string_in)
|
117 |
+
|
118 |
+
highlight_text(results)
|
119 |
+
|
120 |
+
|
121 |
+
##configure inputs/outputs
|
122 |
+
|
123 |
+
input_module = gr.inputs.Textbox(label = "Enter your DNA sequence in the box:")
|
124 |
+
output_module = gr.outputs.Textbox(label = 'Protein sequence')
|
125 |
+
|
126 |
+
### configure gradio, detailed can be found at https://www.gradio.app/docs/#i_slider
|
127 |
+
interface = gr.Interface(fn=get_protein_from_DNA,
|
128 |
+
inputs=input_module,
|
129 |
+
outputs=output_module,
|
130 |
+
title="CSCI1020 Demo 2: Web Application for DNA Transcrioption & RNA Translation",
|
131 |
+
description= "Click examples below for a quick demo",
|
132 |
+
theme = 'huggingface',
|
133 |
+
layout = 'vertical'
|
134 |
+
)
|
135 |
+
interface.launch()
|
136 |
+
|
137 |
+
|
138 |
+
|