File size: 4,626 Bytes
9fd0da5
 
 
 
610475a
 
9fd0da5
610475a
 
9fd0da5
610475a
 
 
 
 
9fd0da5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a43920a
9fd0da5
610475a
 
 
 
9fd0da5
610475a
9fd0da5
 
 
 
 
e3cd57f
9fd0da5
 
 
 
 
e3cd57f
9fd0da5
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import gradio as gr

base_pairing = {'A':'U', 'U':'A', 'C':'G','G':'C'}

def transcribe(seq): 
  RNA_seq = seq.replace('T', 'U')
  ### Define location of start codon (AUG)
  start_codon = RNA_seq.find('AUG')
  codons = []

  for i in range(start_codon, len(RNA_seq), 3): 
    codon = RNA_seq[i:i + 3]
    if len(codon)==3:
        codons.append(codon)
  return codons

####################################################################################### 
# define dictionary to translate three-letter mRNA to three-letter amino acid here 
#######################################################################################

translate = { 
  'UUU':'Phe','UCU':'Ser','UAU':'Tyr','UGU':'Cys',
  'UUC':'Phe','UCC':'Ser','UAC':'Tyr','UGC':'Cys',
  'UUA':'Leu','UCA':'Ser','UAA':'Stop','UGA':'Stop',
  'UUG':'Leu','UCG':'Ser','UAG':'Stop','UGG':'Trp',
  'CUU':'Leu','CCU':'Pro','CAU':'His','CGU':'Arg',
  'CUC':'Leu','CCC':'Pro','CAC':'His','CGC':'Arg',
  'CUA':'Leu','CCA':'Pro','CAA':'Gln','CGA':'Arg',
  'CUG':'Leu','CCG':'Pro','CAG':'Gln','CGG':'Arg',
  'AUU':'Ile','ACU':'Thr','AAU':'Asn','AGU':'Ser',
  'AUC':'Ile','ACC':'Thr','AAC':'Asn','AGC':'Ser',
  'AUA':'Ile','ACA':'Thr','AAA':'Lys','AGA':'Arg',
  'AUG':'Met','ACG':'Thr','AAG':'Lys','AGG':'Arg',
  'GUU':'Val','GCU':'Ala','GAU':'Asp','GGU':'Gly',
  'GUC':'Val','GCC':'Ala','GAC':'Asp','GGC':'Gly',
  'GUA':'Val','GCA':'Ala','GAA':'Glu','GGA':'Gly',
  'GUG':'Val','GCG':'Ala','GAG':'Glu','GGG':'Gly'
}

#######################################################################################
# define dictionary to map three-letter amino acid to one-letter amino acid here
#######################################################################################

AA_mapping = {
    'Ala':'A', 'Arg':'R','Asn':'N',
    'Asp':'D', 'Cys':'C','Gln':'Q',
    'Glu':'E', 'Gly':'G','His':'H',
    'Ile':'I', 'Leu':'L','Lys':'K',
    'Met':'M', 'Phe':'F','Pro':'P',
    'Ser':'S', 'Thr':'T','Trp':'W',
    'Tyr':'Y','Val':'V'
}

def transcribe(seq): 
  RNA_seq = seq.replace('T', 'U')
  reading_frames = []
  # forward strand

  for pos in range(0,3):
    codons = []
    for i in range(pos, len(RNA_seq), 3): 
      codon = RNA_seq[i:i + 3]
      if len(codon)==3:
          codons.append(codon)
    reading_frames.append(codons)

  # reverse strand
  RNA_seq_rev = RNA_seq[::-1]
  cDNA = ''
  for i in range(0, len(RNA_seq_rev)):
    cDNA += base_pairing[RNA_seq_rev[i]]

  for pos in range(0,3):
    codons = []
    for i in range(pos, len(cDNA), 3): 
      codon = cDNA[i:i + 3]
      if len(codon)==3:
          codons.append(codon)
    reading_frames.append(codons)

  return reading_frames

def get_protein_from_DNA(DNA):
  reading_frames =transcribe(DNA)
  orf_list = []

  for i in range(len(reading_frames)):
      mRNA = reading_frames[i]
      one_letter=''
      found_start = False
      for codon in mRNA:
          if codon =='AUG' and not found_start:
            one_letter +=  '<'
            found_start = True
          if codon =='UAA' or codon=='UGA'or codon =='UAG':
            if found_start:
              one_letter +=  '>'
              found_start = False
            continue
          one_letter +=  AA_mapping[translate[codon]]

      # mark the valid orf found
      if one_letter.find('M') != -1:
        one_letter = '*' + one_letter + '>'
      orf_list.append(one_letter)

  r = " * means the ORF sequence exists and highlighted in as <>; \n5'to 3' Frame 1:"+ orf_list[0] +"\n5'to 3' Frame 2:" + orf_list[1] + "\n5'to 3' Frame 3:" + orf_list[2] + "\n3'to 5' Frame 1:" + orf_list[3] + "\n3'to 5' Frame 2:" + orf_list[4] + "\n3'to 5' Frame 3:"+orf_list[5] 
  return r


#results = get_protein_from_DNA(DNA)

#def highlight_text(string_in):
  #string_in=string_in.replace('<','\033[1m\033[94m')
  #string_in=string_in.replace('>','\033[0m')
  #print(string_in)

#highlight_text(results)


##configure inputs/outputs

input_module = gr.inputs.Textbox(label = "Enter your DNA sequence in the box:")
output_module = gr.outputs.Textbox(label = 'Protein sequences from ORFs 1-6:')

### configure gradio, detailed can be found at https://www.gradio.app/docs/#i_slider
interface = gr.Interface(fn=get_protein_from_DNA, 
                         inputs=input_module, 
                         outputs=output_module,
                         title="CSCI1020 Demo 2: Web Application for DNA Transcription & RNA Translation", 
                         description= "Click examples below for a quick demo",
                         theme = 'huggingface',
                         layout = 'vertical'
                         )
interface.launch()