Spaces:
Sleeping
Sleeping
File size: 6,096 Bytes
4dad5f5 821ebf7 4dad5f5 821ebf7 4dad5f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import gradio as gr
DNA = 'GGTCAGAAAAAGCCCTCTCCATGTCTACTCACGATACATCCCTGAAAACCACTGAGGAAGTGGCTTTTCAGATCATCTTGCTTTGCCAGTTTGGGGTTGGGACTTTTGCCAATGTATTTCTCTTTGTCTATAATTTCTCTCCAATCTCGACTGGTTCTAAACAGAGGCCCAGACAAGTGATTTTAAGACACATGGCTGTGGCCAATGCCTTAACTCTCTTCCTCACTATATTTCCAAACAACATGATGACTTTTGCTCCAATTATTCCTCAAACTGACCTCAAATGTAAATTAGAATTCTTCACTCGCCTCGTGGCAAGAAGCACAAACTTGTGTTCAACTTGTGTTCTGAGTATCCATCAGTTTGTCACACTTGTTCCTGTTAATTCAGGTAAAGGAATACTCAGAGCAAGTGTCACAAACATGGCAAGTTATTCTTGTTACAGTTGTTGGTTCTTCAGTGTCTTAAATAACATCTACATTCCAATTAAGGTCACTGGTCCACAGTTAACAGACAATAACAATAACTCTAAAAGCAAGTTGTTCTGTTCCACTTCTGATTTCAGTGTAGGCATTGTCTTCTTGAGGTTTGCCCATGATGCCACATTCATGAGCATCATGGTCTGGACCAGTGTCTCCATGGTACTTCTCCTCCATAGACATTGTCAGAGAATGCAGTACATATTCACTCTCAATCAGGACCCCAGGGGCCAAGCAGAGACCACAGCAACCCATACTATCCTGATGCTGGTAGTCACATTTGTTGGCTTTTATCTTCTAAGTCTTATTTGTATCATCTTTTACACCTATTTTATATATTCTCATCATTCCCTGAGGCATTGCAATGACATTTTGGTTTCGGGTTTCCCTACAATTTCTCCTTTACTGTTGACCTTCAGAGACCCTAAGGGTCCTTGTTCTGTGTTCTTCAACTGTTGAAAGCCAGAGTCACTAAAAATGCCAAACACAGAAGACAGCTTTGCTAATACCATTAAATACTTTATTCCATAAATATGTTTTTAAAAGCTTGTATGAACAAGGTATGGTGCTCACTGCTATACTTATAAAAGAGTAAGGTTATAATCACTTGTTGATATGAAAAGATTTCTGGTTGGAATCTGATTGAAACAGTGAGTTATTCACCACCCTCCATTCTCT'
base_pairing = {'A':'U', 'U':'A', 'C':'G','G':'C'}
def transcribe(seq):
RNA_seq = seq.replace('T', 'U')
### Define location of start codon (AUG)
start_codon = RNA_seq.find('AUG')
codons = []
for i in range(start_codon, len(RNA_seq), 3):
codon = RNA_seq[i:i + 3]
if len(codon)==3:
codons.append(codon)
return codons
#######################################################################################
# define dictionary to translate three-letter mRNA to three-letter amino acid here
#######################################################################################
translate = {
'UUU':'Phe','UCU':'Ser','UAU':'Tyr','UGU':'Cys',
'UUC':'Phe','UCC':'Ser','UAC':'Tyr','UGC':'Cys',
'UUA':'Leu','UCA':'Ser','UAA':'Stop','UGA':'Stop',
'UUG':'Leu','UCG':'Ser','UAG':'Stop','UGG':'Trp',
'CUU':'Leu','CCU':'Pro','CAU':'His','CGU':'Arg',
'CUC':'Leu','CCC':'Pro','CAC':'His','CGC':'Arg',
'CUA':'Leu','CCA':'Pro','CAA':'Gln','CGA':'Arg',
'CUG':'Leu','CCG':'Pro','CAG':'Gln','CGG':'Arg',
'AUU':'Ile','ACU':'Thr','AAU':'Asn','AGU':'Ser',
'AUC':'Ile','ACC':'Thr','AAC':'Asn','AGC':'Ser',
'AUA':'Ile','ACA':'Thr','AAA':'Lys','AGA':'Arg',
'AUG':'Met','ACG':'Thr','AAG':'Lys','AGG':'Arg',
'GUU':'Val','GCU':'Ala','GAU':'Asp','GGU':'Gly',
'GUC':'Val','GCC':'Ala','GAC':'Asp','GGC':'Gly',
'GUA':'Val','GCA':'Ala','GAA':'Glu','GGA':'Gly',
'GUG':'Val','GCG':'Ala','GAG':'Glu','GGG':'Gly'
}
#######################################################################################
# define dictionary to map three-letter amino acid to one-letter amino acid here
#######################################################################################
AA_mapping = {
'Ala':'A', 'Arg':'R','Asn':'N',
'Asp':'D', 'Cys':'C','Gln':'Q',
'Glu':'E', 'Gly':'G','His':'H',
'Ile':'I', 'Leu':'L','Lys':'K',
'Met':'M', 'Phe':'F','Pro':'P',
'Ser':'S', 'Thr':'T','Trp':'W',
'Tyr':'Y','Val':'V'
}
def transcribe(seq):
RNA_seq = seq.replace('T', 'U')
reading_frames = []
# forward strand
for pos in range(0,3):
codons = []
for i in range(pos, len(RNA_seq), 3):
codon = RNA_seq[i:i + 3]
if len(codon)==3:
codons.append(codon)
reading_frames.append(codons)
# # reverse strand
RNA_seq_rev = RNA_seq[::-1]
cDNA = ''
for i in range(0, len(RNA_seq_rev)):
cDNA += base_pairing[RNA_seq_rev[i]]
for pos in range(0,3):
codons = []
for i in range(pos, len(cDNA), 3):
codon = cDNA[i:i + 3]
if len(codon)==3:
codons.append(codon)
reading_frames.append(codons)
return reading_frames
def find_complimentary_dna(dna_to_compliemnt):
_complement_seq = ''
for pos in range(0,len(dna_to_compliemnt)):
_last_char = dna_to_compliemnt[len(dna_to_compliemnt)-pos-1]
if _last_char == 'A':
_complement_seq = _complement_seq + 'A'
elif _last_char == 'T':
_complement_seq = _complement_seq + 'U'
elif _last_char == 'G':
_complement_seq = _complement_seq + 'G'
elif _last_char == 'C':
_complement_seq = _complement_seq + 'C'
return _complement_seq
def get_protein_from_DNA(DNA):
reading_frames =transcribe(DNA)
orf_list = []
for i in range(len(reading_frames)):
mRNA = reading_frames[i]
one_letter=''
found_start = False
for codon in mRNA:
if codon =='AUG' and not found_start:
one_letter += '<'
found_start = True
if codon =='UAA' or codon=='UGA'or codon =='UAG':
if found_start:
one_letter += '>'
found_start = False
continue
one_letter += AA_mapping[translate[codon]]
# mark the valid orf found
if one_letter.find('M') != -1:
one_letter = '*' + one_letter + '>'
orf_list.append(one_letter)
r = " * means the ORF sequence exists and highlighted in as <>; \n5'to 3' Frame 1:"+ orf_list[0] +"\n5'to 3' Frame 2:" + orf_list[1] + "\n5'to 3' Frame 3:" + orf_list[2] + "\n3'to 5' Frame 1:" + orf_list[3] + "\n3'to 5' Frame 2:" + orf_list[4] + "\n3'to 5' Frame 3:"+orf_list[5]
return DNA, find_complimentary_dna(DNA), r
input1 = gr.inputs.Textbox(label = 'DNA sequence')
output1 = gr.outputs.Textbox(label = 'Primary Sequence')
output2 = gr.outputs.Textbox(label = 'Complimentary Sequence')
output3 = gr.outputs.Textbox(label = 'Open Reading Frames')
interface = gr.Interface(fn=get_protein_from_DNA,
inputs=input1,
outputs=[output1,output2,output3],
title="CSCI1020: Web Application for Transcription/Translation",
description= "A quick tool for translating a DNA sequence into a protein sequence",
theme = 'huggingface',
layout = 'vertical'
)
interface.launch() |