Spaces:
Running
Running
G = 'MVSKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTLTYGVQCFSRYPDHMKQHDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYIMADKQKNGIKVNFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITLGMDELYK' | |
L2 = 'XXXXXXX' | |
T = 'LEENLYFQS' | |
L1 = 'GGSGGGSGGGSGGGS' | |
H = 'HHHHHH' | |
soluTag = { | |
'MBP': 'MKIKTGARILALSALTTMMFSASALAKIEEGKLVIWINGDKGYNGLAEVGKKFEKDTGIKVTVEHPDKLEEKFPQVAATGDGPDIIFWAHDRFGGYAQSGLLAEITPDKAFQDKLYPFTWDAVRYNGKLIAYPIAVEALSLIYNKDLLPNPPKTWEEIPALDKELKAKGKSALMFNLQEPYFTWPLIAADGGYAFKYENGKYDIKDVGVDNAGAKAGLTFLVDLIKNKHMNADTDYSIAEAAFNKGETAMTINGPWAWSNIDTSKVNYGVTVLPTFKGQPSKPFVGVLSAGINAASPNKELAKEFLENYLLTDEGLEAVNKDKPLGAVALKSYEEELAKDPRIAATMENAQKGEIMPNIPQMSAFWYAVRTAVINAASGRQTVDEALKDAQTRITK', | |
'SUMO': 'MSDQEAKPSTEDLGDKKEGEYIKLKVIGQDSSEIHFKVKMTTHLKKLKESYCQRQGVPMNSLRFLFEGQRIADNHTPKELGMEEEDVIEVYQEQTGGHSTV', | |
'Fh8': 'MPSVQEVEKLLHVLDRNGDGKVSAEELKAFADDSKCPLDSNKIKAFIKEHDKNKDGKLDLKELVSILSS', | |
'GST': 'MKLFYKPGACSLASHITLRESGKDFTLVSVDLMKKRLENGDDYFAVNPKGQVPALLLDDGTLLTEGVAIMQYLADSVPDRQLLAPVNSISRYKTIEWLNYIATELHKGFTPLFRPDTPEEYKPTVRAQLEKKLQYVNEALKDEHWICGQRFTIADAYLFTVLRWAYAVKLNLEGLEHIAAFMQRMAERPEVQDALSAEGLK', | |
'MBP': 'MKIKTGARILALSALTTMMFSASALAKIEEGKLVIWINGDKGYNGLAEVGKKFEKDTGIKVTVEHPDKLEEKFPQVAATGDGPDIIFWAHDRFGGYAQSGLLAEITPDKAFQDKLYPFTWDAVRYNGKLIAYPIAVEALSLIYNKDLLPNPPKTWEEIPALDKELKAKGKSALMFNLQEPYFTWPLIAADGGYAFKYENGKYDIKDVGVDNAGAKAGLTFLVDLIKNKHMNADTDYSIAEAAFNKGETAMTINGPWAWSNIDTSKVNYGVTVLPTFKGQPSKPFVGVLSAGINAASPNKELAKEFLENYLLTDEGLEAVNKDKPLGAVALKSYEEELAKDPRIAATMENAQKGEIMPNIPQMSAFWYAVRTAVINAASGRQTVDEALKDAQTRITK', | |
'NusA': 'MNKEILAVVEAVSNEKALPREKIFEALESALATATKKKYEQEIDVRVQIDRKSGDFDTFRRWLVVDEVTQPTKEITLEAARYEDESLNLGDYVEDQIESVTFDRITTQTAKQVIVQKVREAERAMVVDQFREHEGEIITGVVKKVNRDNISLDLGNNAEAVILREDMLPRENFRPGDRVRGVLYSVRPEARGAQLFVTRSKPEMLIELFRIEVPEIGEEVIEIKAAARDPGSRAKIAVKTNDKRIDPVGACVGMRGARVQAVSTELGGERIDIVLWDDNPAQFVINAMAPADVASIVVDEDKHTMDIAVEAGNLAQAIGRNGQNVRLASQLSGWELNVMTDDLQAKHQAEAHAAIDTFTKYLDIDEDFATVLVEEGFSTLEELAYVPMKELLEIEGLDEPTVEALRERAKNALATIAQAQEESLGDNKPADDLLNLEGVDRDLAFKLAARGVCTLEDLAEQGIDDLADIEGLTDEKAGALIMAARNICWFGDEA', | |
'Thioredoxin': 'SDKIIHLTDDSFDTDVLKADGAILVDFWAEWCGPCKMIAPILDEIADEYQGKLTVAKLNIDQNPGTAPKYGIRGIPTLLLFKNGEVAATKVGALSKGQLKEFLDANLA', | |
'FLAG': 'DYKDDDDK', | |
'S-tag': 'KETAAAKFERQHMDS', | |
'CBP': 'KRRWKKNFIAVSAANRFKKISSSGAL', | |
'SUMO': 'MSDQEAKPSTEDLGDKKEGEYIKLKVIGQDSSEIHFKVKMTTHLKKLKESYCQRQGVPMNSLRFLFEGQRIADNHTPKELGMEEEDVIEVYQEQTGGHSTV', | |
'STREPII': 'WSHPQFEK', | |
'BAP': 'GLNDIFEAQKIEWHE', | |
'NT11': 'VSEPHDYNYEK', | |
'Fh8': 'MPSVQEVEKLLHVLDRNGDGKVSAEELKAFADDSKCPLDSNKIKAFIKEHDKNKDGKLDLKELVSILSS', | |
'Tab2': 'VVSHFND', | |
'Z-basic': 'VDNKFNKEQQNAFYEILHLPNLNEEQRNAFIQSLKDDPSQSANLLAEAKKLNDAQPK', | |
'ProteinA': 'MKKKNIYSIRKLGVGIASVTLGTLLISGGVTPAANAAQHDEAQQNAFYQVLNMPNLNADQRNGFIQSLKDDPSQSANVLGEAQKLNDSQAPKADAQQNNFNKDQQSAFYEILNMPNLNEAQRNGFIQSLKDDPSQSTNVLGEAKKLNESQAPKADNNFNKEQQNAFYEILNMPNLNEEQRNGFIQSLKDDPSQSANLLSEAKKLNESQAPKADNKFNKEQQNAFYEILHLPNLNEEQRNGFIQSLKDDPSQSANLLAEAKKLNDAQAPKADNKFNKEQQNAFYEILHLPNLTEEQRNGFIQSLKDDPSVSKEILAEAKKLNDAQAPKEEDNNKPGKEDNNKPGKEDNNKPGKEDNNKPGKEDNNKPGKEDGNKPGKEDNKKPGKEDGNKPGKEDNKKPGKEDGNKPGKEDGNKPGKEDGNGVHVVKPGDTVNDIAKANGADKIAADNKLADKNMIKPGQELVVDKKQPANHADANKAQALPETGEENPFIGTTVFGGLSLALGAALLEL', | |
'IMPACT': 'TNPGVSAWQVNTAYTAGQLVTYNGKTYKCLQPHTSLAGWEPSNVPALWQLQ', | |
'mysB': 'MTMYATLEEAIDAAREEFLADNPGIDAEDANVQQFNAQKYVLQDGDIMWQVEFFADEGEEGECLPMLSGEAAQSVFDGDYDEIEIRQEWQEENTLHEWDEGEFQLEPPLDTEEGRAAADEWDER', | |
'PolyR': 'RRRRR', | |
'c-myc': 'EQKLISEEDL', | |
'S': 'KETAAAKFERQHMDS', | |
'SBP-tag': 'MDEKTTGWRGGHVVEGLAGELEQLRARLEHHPQGQREP', | |
'Strep-tag': 'WRHPQFGG', | |
'Twin-Strep-tag': 'SAWSHPQFEKGGGSGGGSGGSAWSHPQFEK', | |
'HAT': 'KDHLIHNVHKEFHAHAHNK', | |
'BCCP': 'MDIRKIKKLIELVEESGISELEISEGEESVRISRAAPAASFPVMQQAYAAPMMQQPAQSNAAAPATVPSMEAPAAAEISGHIVRSPMVGTFYRTPSPDAKAFIEVGQKVNVDTLCIVEAMKMMNQIEADKSGTVKAILVESGQPVEFDEPLVVIE', | |
'HaloTag': 'SGSAEIGTGFPFDPHYVEVLGERMHYVDVGPRDGTPVLFLHGNPTSSYVWRNIIPHVAPTHRCIAPDLIGMGKSDKPDLGYFFDDHVRFMDAFIEALGLEEVVLVIHDWGSALGFHWAKRNPERVKGIAFMEFIRPIPTWDEWPEFARETFQAFRTTDVGRKLIIDQNVFIEGTLPCGVVRPLTEVEMDHYREPFLNPVDREPLWRFPNELPIAGEPANIVALVEEYMDWLHQSPVPKLLFWGTPGVLIPPAEAARLAKSLPNCKAVDIGPGLNLLQEDNPDLIGSEIARWLSTLEISG' | |
} | |
def run_N(P:str, gfp:bool, solu_tag:list)->dict: | |
const = {} | |
if gfp: | |
for f in solu_tag: | |
F = soluTag[f] | |
const[f"con_gfp_N_{f}"] = P+T+L1+F+L2+G+H | |
if not solu_tag: | |
const[f"con_gfp_N"] = P+T+L2+G+H | |
else: | |
for f in solu_tag: | |
F = soluTag[f] | |
const[f"con_N_{f}"] = P+T+L1+F+H | |
if not solu_tag: | |
const[f"con_N"] = P+T+H # TODO: verify this one | |
return const | |
def run_C(P:str, gfp:bool, solu_tag:list)->dict: | |
const = {} | |
if gfp: | |
for f in solu_tag: | |
F = soluTag[f] | |
const[f"con_gfp_C_{f}"] = H+G+L2+F+L1+T+P | |
if not solu_tag: | |
const[f"con_gfp_C"] = H+G+L2+T+P | |
else: | |
for f in solu_tag: | |
F = soluTag[f] | |
const[f"con_C_{f}"] = H+F+L1+T+P | |
if not solu_tag: | |
const[f"con_C"] = H+T+P #TODO verify this | |
return const | |
def build(P:str, solu_tag:list, GFP:bool, P_pos:str)->dict: | |
""" | |
This function calculate constructs to Protera's LAB considering: | |
-orientation N to C terminal, or reverse | |
-With/withou GFP | |
-Fusion Proteins | |
usage = build( amino_sequence, solutags, GFP:True/false, P_pos:"N+C","N","C") | |
solutags availables: | |
'MBP', 'SUMO', 'Fh8', 'GST', 'NusA', 'Thioredoxin', 'FLAG', 'S-tag', 'CBP', 'STREPII', 'BAP', 'NT11', 'Tab2', 'Z-basic', 'ProteinA', 'IMPACT', 'mysB', 'PolyR', 'c-myc', 'S', 'SBP-tag', 'Strep-tag', 'Twin-Strep-tag', 'HAT', 'BCCP', 'HaloTag' | |
""" | |
if "all" in solu_tag: | |
solu_tag = list(soluTag.keys()) | |
d1 = {} | |
d2 = {} | |
if "C" in P_pos: | |
d2 = run_C(P, GFP, solu_tag) | |
if "N" in P_pos: | |
d1 = run_N(P, GFP, solu_tag) | |
return {**d1,**d2} |