kipfriend commited on
Commit
9fd0da5
1 Parent(s): 3946e0c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -0
app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ base_pairing = {'A':'U', 'U':'A', 'C':'G','G':'C'}
4
+
5
+ #def transcribe(seq):
6
+ #RNA_seq = seq.replace('T', 'U')
7
+ ### Define location of start codon (AUG)
8
+ #start_codon = RNA_seq.find('AUG')
9
+ #codons = []
10
+
11
+ #for i in range(start_codon, len(RNA_seq), 3):
12
+ #codon = RNA_seq[i:i + 3]
13
+ #if len(codon)==3:
14
+ #codons.append(codon)
15
+ #return codons
16
+
17
+ #######################################################################################
18
+ # define dictionary to translate three-letter mRNA to three-letter amino acid here
19
+ #######################################################################################
20
+
21
+ translate = {
22
+ 'UUU':'Phe','UCU':'Ser','UAU':'Tyr','UGU':'Cys',
23
+ 'UUC':'Phe','UCC':'Ser','UAC':'Tyr','UGC':'Cys',
24
+ 'UUA':'Leu','UCA':'Ser','UAA':'Stop','UGA':'Stop',
25
+ 'UUG':'Leu','UCG':'Ser','UAG':'Stop','UGG':'Trp',
26
+ 'CUU':'Leu','CCU':'Pro','CAU':'His','CGU':'Arg',
27
+ 'CUC':'Leu','CCC':'Pro','CAC':'His','CGC':'Arg',
28
+ 'CUA':'Leu','CCA':'Pro','CAA':'Gln','CGA':'Arg',
29
+ 'CUG':'Leu','CCG':'Pro','CAG':'Gln','CGG':'Arg',
30
+ 'AUU':'Ile','ACU':'Thr','AAU':'Asn','AGU':'Ser',
31
+ 'AUC':'Ile','ACC':'Thr','AAC':'Asn','AGC':'Ser',
32
+ 'AUA':'Ile','ACA':'Thr','AAA':'Lys','AGA':'Arg',
33
+ 'AUG':'Met','ACG':'Thr','AAG':'Lys','AGG':'Arg',
34
+ 'GUU':'Val','GCU':'Ala','GAU':'Asp','GGU':'Gly',
35
+ 'GUC':'Val','GCC':'Ala','GAC':'Asp','GGC':'Gly',
36
+ 'GUA':'Val','GCA':'Ala','GAA':'Glu','GGA':'Gly',
37
+ 'GUG':'Val','GCG':'Ala','GAG':'Glu','GGG':'Gly'
38
+ }
39
+
40
+ #######################################################################################
41
+ # define dictionary to map three-letter amino acid to one-letter amino acid here
42
+ #######################################################################################
43
+
44
+ AA_mapping = {
45
+ 'Ala':'A', 'Arg':'R','Asn':'N',
46
+ 'Asp':'D', 'Cys':'C','Gln':'Q',
47
+ 'Glu':'E', 'Gly':'G','His':'H',
48
+ 'Ile':'I', 'Leu':'L','Lys':'K',
49
+ 'Met':'M', 'Phe':'F','Pro':'P',
50
+ 'Ser':'S', 'Thr':'T','Trp':'W',
51
+ 'Tyr':'Y','Val':'V'
52
+ }
53
+
54
+ def transcribe(seq):
55
+ RNA_seq = seq.replace('T', 'U')
56
+ reading_frames = []
57
+ # forward strand
58
+
59
+ for pos in range(0,3):
60
+ codons = []
61
+ for i in range(pos, len(RNA_seq), 3):
62
+ codon = RNA_seq[i:i + 3]
63
+ if len(codon)==3:
64
+ codons.append(codon)
65
+ reading_frames.append(codons)
66
+
67
+ # reverse strand
68
+ RNA_seq_rev = RNA_seq[::-1]
69
+ cDNA = ''
70
+ for i in range(0, len(RNA_seq_rev)):
71
+ cDNA += base_pairing[RNA_seq_rev[i]]
72
+
73
+ for pos in range(0,3):
74
+ codons = []
75
+ for i in range(pos, len(cDNA), 3):
76
+ codon = cDNA[i:i + 3]
77
+ if len(codon)==3:
78
+ codons.append(codon)
79
+ reading_frames.append(codons)
80
+
81
+ return reading_frames
82
+
83
+ def get_protein_from_DNA(DNA):
84
+ reading_frames =transcribe(DNA)
85
+ orf_list = []
86
+
87
+ for i in range(len(reading_frames)):
88
+ mRNA = reading_frames[i]
89
+ one_letter=''
90
+ found_start = False
91
+ for codon in mRNA:
92
+ if codon =='AUG' and not found_start:
93
+ one_letter += '<'
94
+ found_start = True
95
+ if codon =='UAA' or codon=='UGA'or codon =='UAG':
96
+ if found_start:
97
+ one_letter += '>'
98
+ found_start = False
99
+ continue
100
+ one_letter += AA_mapping[translate[codon]]
101
+
102
+ # mark the valid orf found
103
+ if one_letter.find('M') != -1:
104
+ one_letter = '*' + one_letter + '>'
105
+ orf_list.append(one_letter)
106
+
107
+ r = " * means the ORF sequence exists and highlighted in as <>; \n5'to 3' Frame 1:"+ orf_list[0] +"\n5'to 3' Frame 2:" + orf_list[1] + "\n5'to 3' Frame 3:" + orf_list[2] + "\n3'to 5' Frame 1:" + orf_list[3] + "\n3'to 5' Frame 2:" + orf_list[4] + "\n3'to 5' Frame 3:"+orf_list[5]
108
+ return r
109
+
110
+
111
+ results = get_protein_from_DNA(DNA)
112
+
113
+ def highlight_text(string_in):
114
+ string_in=string_in.replace('<','\033[1m\033[94m')
115
+ string_in=string_in.replace('>','\033[0m')
116
+ print(string_in)
117
+
118
+ highlight_text(results)
119
+
120
+
121
+ ##configure inputs/outputs
122
+
123
+ input_module = gr.inputs.Textbox(label = "Enter your DNA sequence in the box:")
124
+ output_module = gr.outputs.Textbox(label = 'Protein sequence')
125
+
126
+ ### configure gradio, detailed can be found at https://www.gradio.app/docs/#i_slider
127
+ interface = gr.Interface(fn=get_protein_from_DNA,
128
+ inputs=input_module,
129
+ outputs=output_module,
130
+ title="CSCI1020 Demo 2: Web Application for DNA Transcrioption & RNA Translation",
131
+ description= "Click examples below for a quick demo",
132
+ theme = 'huggingface',
133
+ layout = 'vertical'
134
+ )
135
+ interface.launch()
136
+
137
+
138
+