File size: 6,096 Bytes
4dad5f5
821ebf7
 
 
 
 
 
 
4dad5f5
821ebf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4dad5f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import gradio as gr
DNA = 'GGTCAGAAAAAGCCCTCTCCATGTCTACTCACGATACATCCCTGAAAACCACTGAGGAAGTGGCTTTTCAGATCATCTTGCTTTGCCAGTTTGGGGTTGGGACTTTTGCCAATGTATTTCTCTTTGTCTATAATTTCTCTCCAATCTCGACTGGTTCTAAACAGAGGCCCAGACAAGTGATTTTAAGACACATGGCTGTGGCCAATGCCTTAACTCTCTTCCTCACTATATTTCCAAACAACATGATGACTTTTGCTCCAATTATTCCTCAAACTGACCTCAAATGTAAATTAGAATTCTTCACTCGCCTCGTGGCAAGAAGCACAAACTTGTGTTCAACTTGTGTTCTGAGTATCCATCAGTTTGTCACACTTGTTCCTGTTAATTCAGGTAAAGGAATACTCAGAGCAAGTGTCACAAACATGGCAAGTTATTCTTGTTACAGTTGTTGGTTCTTCAGTGTCTTAAATAACATCTACATTCCAATTAAGGTCACTGGTCCACAGTTAACAGACAATAACAATAACTCTAAAAGCAAGTTGTTCTGTTCCACTTCTGATTTCAGTGTAGGCATTGTCTTCTTGAGGTTTGCCCATGATGCCACATTCATGAGCATCATGGTCTGGACCAGTGTCTCCATGGTACTTCTCCTCCATAGACATTGTCAGAGAATGCAGTACATATTCACTCTCAATCAGGACCCCAGGGGCCAAGCAGAGACCACAGCAACCCATACTATCCTGATGCTGGTAGTCACATTTGTTGGCTTTTATCTTCTAAGTCTTATTTGTATCATCTTTTACACCTATTTTATATATTCTCATCATTCCCTGAGGCATTGCAATGACATTTTGGTTTCGGGTTTCCCTACAATTTCTCCTTTACTGTTGACCTTCAGAGACCCTAAGGGTCCTTGTTCTGTGTTCTTCAACTGTTGAAAGCCAGAGTCACTAAAAATGCCAAACACAGAAGACAGCTTTGCTAATACCATTAAATACTTTATTCCATAAATATGTTTTTAAAAGCTTGTATGAACAAGGTATGGTGCTCACTGCTATACTTATAAAAGAGTAAGGTTATAATCACTTGTTGATATGAAAAGATTTCTGGTTGGAATCTGATTGAAACAGTGAGTTATTCACCACCCTCCATTCTCT'
base_pairing = {'A':'U', 'U':'A', 'C':'G','G':'C'}
def transcribe(seq): 
  RNA_seq = seq.replace('T', 'U')
  ### Define location of start codon (AUG)
  start_codon = RNA_seq.find('AUG')
  codons = []

  for i in range(start_codon, len(RNA_seq), 3): 
    codon = RNA_seq[i:i + 3]
    if len(codon)==3:
        codons.append(codon)
  return codons

####################################################################################### 
# define dictionary to translate three-letter mRNA to three-letter amino acid here 
#######################################################################################

translate = { 
  'UUU':'Phe','UCU':'Ser','UAU':'Tyr','UGU':'Cys',
  'UUC':'Phe','UCC':'Ser','UAC':'Tyr','UGC':'Cys',
  'UUA':'Leu','UCA':'Ser','UAA':'Stop','UGA':'Stop',
  'UUG':'Leu','UCG':'Ser','UAG':'Stop','UGG':'Trp',
  'CUU':'Leu','CCU':'Pro','CAU':'His','CGU':'Arg',
  'CUC':'Leu','CCC':'Pro','CAC':'His','CGC':'Arg',
  'CUA':'Leu','CCA':'Pro','CAA':'Gln','CGA':'Arg',
  'CUG':'Leu','CCG':'Pro','CAG':'Gln','CGG':'Arg',
  'AUU':'Ile','ACU':'Thr','AAU':'Asn','AGU':'Ser',
  'AUC':'Ile','ACC':'Thr','AAC':'Asn','AGC':'Ser',
  'AUA':'Ile','ACA':'Thr','AAA':'Lys','AGA':'Arg',
  'AUG':'Met','ACG':'Thr','AAG':'Lys','AGG':'Arg',
  'GUU':'Val','GCU':'Ala','GAU':'Asp','GGU':'Gly',
  'GUC':'Val','GCC':'Ala','GAC':'Asp','GGC':'Gly',
  'GUA':'Val','GCA':'Ala','GAA':'Glu','GGA':'Gly',
  'GUG':'Val','GCG':'Ala','GAG':'Glu','GGG':'Gly'
}

#######################################################################################
# define dictionary to map three-letter amino acid to one-letter amino acid here
#######################################################################################

AA_mapping = {
    'Ala':'A', 'Arg':'R','Asn':'N',
    'Asp':'D', 'Cys':'C','Gln':'Q',
    'Glu':'E', 'Gly':'G','His':'H',
    'Ile':'I', 'Leu':'L','Lys':'K',
    'Met':'M', 'Phe':'F','Pro':'P',
    'Ser':'S', 'Thr':'T','Trp':'W',
    'Tyr':'Y','Val':'V'
}

def transcribe(seq): 
  RNA_seq = seq.replace('T', 'U')
  reading_frames = []
  # forward strand

  for pos in range(0,3):
    codons = []
    for i in range(pos, len(RNA_seq), 3): 
      codon = RNA_seq[i:i + 3]
      if len(codon)==3:
          codons.append(codon)
    reading_frames.append(codons)

  # # reverse strand
  RNA_seq_rev = RNA_seq[::-1]
  cDNA = ''
  for i in range(0, len(RNA_seq_rev)):
    cDNA += base_pairing[RNA_seq_rev[i]]

  for pos in range(0,3):
    codons = []
    for i in range(pos, len(cDNA), 3): 
      codon = cDNA[i:i + 3]
      if len(codon)==3:
          codons.append(codon)
    reading_frames.append(codons)

  return reading_frames
def find_complimentary_dna(dna_to_compliemnt):
   _complement_seq = ''

   for pos in range(0,len(dna_to_compliemnt)):
     _last_char = dna_to_compliemnt[len(dna_to_compliemnt)-pos-1]

     if _last_char == 'A':
       _complement_seq = _complement_seq + 'A'
     elif _last_char == 'T':
       _complement_seq = _complement_seq + 'U'
     elif _last_char == 'G':
       _complement_seq = _complement_seq + 'G'
     elif _last_char == 'C':
       _complement_seq = _complement_seq + 'C'
  
   return _complement_seq

def get_protein_from_DNA(DNA):
  reading_frames =transcribe(DNA)
  orf_list = []

  for i in range(len(reading_frames)):
      mRNA = reading_frames[i]
      one_letter=''
      found_start = False
      for codon in mRNA:
          if codon =='AUG' and not found_start:
            one_letter +=  '<'
            found_start = True
          if codon =='UAA' or codon=='UGA'or codon =='UAG':
            if found_start:
              one_letter +=  '>'
              found_start = False
            continue
          one_letter +=  AA_mapping[translate[codon]]

      # mark the valid orf found
      if one_letter.find('M') != -1:
        one_letter = '*' + one_letter + '>'
      orf_list.append(one_letter)

  r = " * means the ORF sequence exists and highlighted in as <>; \n5'to 3' Frame 1:"+ orf_list[0] +"\n5'to 3' Frame 2:" + orf_list[1] + "\n5'to 3' Frame 3:" + orf_list[2] + "\n3'to 5' Frame 1:" + orf_list[3] + "\n3'to 5' Frame 2:" + orf_list[4] + "\n3'to 5' Frame 3:"+orf_list[5] 
  return DNA, find_complimentary_dna(DNA), r

input1 = gr.inputs.Textbox(label = 'DNA sequence')
output1 = gr.outputs.Textbox(label = 'Primary Sequence')
output2 = gr.outputs.Textbox(label = 'Complimentary Sequence')
output3 = gr.outputs.Textbox(label = 'Open Reading Frames')

interface = gr.Interface(fn=get_protein_from_DNA,
                         inputs=input1,
                         outputs=[output1,output2,output3],
                         title="CSCI1020: Web Application for Transcription/Translation", 
                         description= "A quick tool for translating a DNA sequence into a protein sequence",
                         theme = 'huggingface',
                         layout = 'vertical'
                         )
interface.launch()