Spaces:

kipfriend
/

transcription_translation

Running

File size: 4,626 Bytes

import gradio as gr

base_pairing = {'A':'U', 'U':'A', 'C':'G','G':'C'}

def transcribe(seq): 
  RNA_seq = seq.replace('T', 'U')
  ### Define location of start codon (AUG)
  start_codon = RNA_seq.find('AUG')
  codons = []

  for i in range(start_codon, len(RNA_seq), 3): 
    codon = RNA_seq[i:i + 3]
    if len(codon)==3:
        codons.append(codon)
  return codons

####################################################################################### 
# define dictionary to translate three-letter mRNA to three-letter amino acid here 
#######################################################################################

translate = { 
  'UUU':'Phe','UCU':'Ser','UAU':'Tyr','UGU':'Cys',
  'UUC':'Phe','UCC':'Ser','UAC':'Tyr','UGC':'Cys',
  'UUA':'Leu','UCA':'Ser','UAA':'Stop','UGA':'Stop',
  'UUG':'Leu','UCG':'Ser','UAG':'Stop','UGG':'Trp',
  'CUU':'Leu','CCU':'Pro','CAU':'His','CGU':'Arg',
  'CUC':'Leu','CCC':'Pro','CAC':'His','CGC':'Arg',
  'CUA':'Leu','CCA':'Pro','CAA':'Gln','CGA':'Arg',
  'CUG':'Leu','CCG':'Pro','CAG':'Gln','CGG':'Arg',
  'AUU':'Ile','ACU':'Thr','AAU':'Asn','AGU':'Ser',
  'AUC':'Ile','ACC':'Thr','AAC':'Asn','AGC':'Ser',
  'AUA':'Ile','ACA':'Thr','AAA':'Lys','AGA':'Arg',
  'AUG':'Met','ACG':'Thr','AAG':'Lys','AGG':'Arg',
  'GUU':'Val','GCU':'Ala','GAU':'Asp','GGU':'Gly',
  'GUC':'Val','GCC':'Ala','GAC':'Asp','GGC':'Gly',
  'GUA':'Val','GCA':'Ala','GAA':'Glu','GGA':'Gly',
  'GUG':'Val','GCG':'Ala','GAG':'Glu','GGG':'Gly'
}

#######################################################################################
# define dictionary to map three-letter amino acid to one-letter amino acid here
#######################################################################################

AA_mapping = {
    'Ala':'A', 'Arg':'R','Asn':'N',
    'Asp':'D', 'Cys':'C','Gln':'Q',
    'Glu':'E', 'Gly':'G','His':'H',
    'Ile':'I', 'Leu':'L','Lys':'K',
    'Met':'M', 'Phe':'F','Pro':'P',
    'Ser':'S', 'Thr':'T','Trp':'W',
    'Tyr':'Y','Val':'V'
}

def transcribe(seq): 
  RNA_seq = seq.replace('T', 'U')
  reading_frames = []
  # forward strand

  for pos in range(0,3):
    codons = []
    for i in range(pos, len(RNA_seq), 3): 
      codon = RNA_seq[i:i + 3]
      if len(codon)==3:
          codons.append(codon)
    reading_frames.append(codons)

  # reverse strand
  RNA_seq_rev = RNA_seq[::-1]
  cDNA = ''
  for i in range(0, len(RNA_seq_rev)):
    cDNA += base_pairing[RNA_seq_rev[i]]

  for pos in range(0,3):
    codons = []
    for i in range(pos, len(cDNA), 3): 
      codon = cDNA[i:i + 3]
      if len(codon)==3:
          codons.append(codon)
    reading_frames.append(codons)

  return reading_frames

def get_protein_from_DNA(DNA):
  reading_frames =transcribe(DNA)
  orf_list = []

  for i in range(len(reading_frames)):
      mRNA = reading_frames[i]
      one_letter=''
      found_start = False
      for codon in mRNA:
          if codon =='AUG' and not found_start:
            one_letter +=  '<'
            found_start = True
          if codon =='UAA' or codon=='UGA'or codon =='UAG':
            if found_start:
              one_letter +=  '>'
              found_start = False
            continue
          one_letter +=  AA_mapping[translate[codon]]

      # mark the valid orf found
      if one_letter.find('M') != -1:
        one_letter = '*' + one_letter + '>'
      orf_list.append(one_letter)

  r = " * means the ORF sequence exists and highlighted in as <>; \n5'to 3' Frame 1:"+ orf_list[0] +"\n5'to 3' Frame 2:" + orf_list[1] + "\n5'to 3' Frame 3:" + orf_list[2] + "\n3'to 5' Frame 1:" + orf_list[3] + "\n3'to 5' Frame 2:" + orf_list[4] + "\n3'to 5' Frame 3:"+orf_list[5] 
  return r


#results = get_protein_from_DNA(DNA)

#def highlight_text(string_in):
  #string_in=string_in.replace('<','\033[1m\033[94m')
  #string_in=string_in.replace('>','\033[0m')
  #print(string_in)

#highlight_text(results)


##configure inputs/outputs

input_module = gr.inputs.Textbox(label = "Enter your DNA sequence in the box:")
output_module = gr.outputs.Textbox(label = 'Protein sequences from ORFs 1-6:')

### configure gradio, detailed can be found at https://www.gradio.app/docs/#i_slider
interface = gr.Interface(fn=get_protein_from_DNA, 
                         inputs=input_module, 
                         outputs=output_module,
                         title="CSCI1020 Demo 2: Web Application for DNA Transcription & RNA Translation", 
                         description= "Click examples below for a quick demo",
                         theme = 'huggingface',
                         layout = 'vertical'
                         )
interface.launch()