G = 'MVSKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTLTYGVQCFSRYPDHMKQHDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYIMADKQKNGIKVNFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITLGMDELYK' L2 = 'XXXXXXX' T = 'LEENLYFQS' L1 = 'GGSGGGSGGGSGGGS' H = 'HHHHHH' soluTag = { 'MBP': 'MKIKTGARILALSALTTMMFSASALAKIEEGKLVIWINGDKGYNGLAEVGKKFEKDTGIKVTVEHPDKLEEKFPQVAATGDGPDIIFWAHDRFGGYAQSGLLAEITPDKAFQDKLYPFTWDAVRYNGKLIAYPIAVEALSLIYNKDLLPNPPKTWEEIPALDKELKAKGKSALMFNLQEPYFTWPLIAADGGYAFKYENGKYDIKDVGVDNAGAKAGLTFLVDLIKNKHMNADTDYSIAEAAFNKGETAMTINGPWAWSNIDTSKVNYGVTVLPTFKGQPSKPFVGVLSAGINAASPNKELAKEFLENYLLTDEGLEAVNKDKPLGAVALKSYEEELAKDPRIAATMENAQKGEIMPNIPQMSAFWYAVRTAVINAASGRQTVDEALKDAQTRITK', 'SUMO': 'MSDQEAKPSTEDLGDKKEGEYIKLKVIGQDSSEIHFKVKMTTHLKKLKESYCQRQGVPMNSLRFLFEGQRIADNHTPKELGMEEEDVIEVYQEQTGGHSTV', 'Fh8': 'MPSVQEVEKLLHVLDRNGDGKVSAEELKAFADDSKCPLDSNKIKAFIKEHDKNKDGKLDLKELVSILSS', 'GST': 'MKLFYKPGACSLASHITLRESGKDFTLVSVDLMKKRLENGDDYFAVNPKGQVPALLLDDGTLLTEGVAIMQYLADSVPDRQLLAPVNSISRYKTIEWLNYIATELHKGFTPLFRPDTPEEYKPTVRAQLEKKLQYVNEALKDEHWICGQRFTIADAYLFTVLRWAYAVKLNLEGLEHIAAFMQRMAERPEVQDALSAEGLK', 'MBP': 'MKIKTGARILALSALTTMMFSASALAKIEEGKLVIWINGDKGYNGLAEVGKKFEKDTGIKVTVEHPDKLEEKFPQVAATGDGPDIIFWAHDRFGGYAQSGLLAEITPDKAFQDKLYPFTWDAVRYNGKLIAYPIAVEALSLIYNKDLLPNPPKTWEEIPALDKELKAKGKSALMFNLQEPYFTWPLIAADGGYAFKYENGKYDIKDVGVDNAGAKAGLTFLVDLIKNKHMNADTDYSIAEAAFNKGETAMTINGPWAWSNIDTSKVNYGVTVLPTFKGQPSKPFVGVLSAGINAASPNKELAKEFLENYLLTDEGLEAVNKDKPLGAVALKSYEEELAKDPRIAATMENAQKGEIMPNIPQMSAFWYAVRTAVINAASGRQTVDEALKDAQTRITK', 'NusA': 'MNKEILAVVEAVSNEKALPREKIFEALESALATATKKKYEQEIDVRVQIDRKSGDFDTFRRWLVVDEVTQPTKEITLEAARYEDESLNLGDYVEDQIESVTFDRITTQTAKQVIVQKVREAERAMVVDQFREHEGEIITGVVKKVNRDNISLDLGNNAEAVILREDMLPRENFRPGDRVRGVLYSVRPEARGAQLFVTRSKPEMLIELFRIEVPEIGEEVIEIKAAARDPGSRAKIAVKTNDKRIDPVGACVGMRGARVQAVSTELGGERIDIVLWDDNPAQFVINAMAPADVASIVVDEDKHTMDIAVEAGNLAQAIGRNGQNVRLASQLSGWELNVMTDDLQAKHQAEAHAAIDTFTKYLDIDEDFATVLVEEGFSTLEELAYVPMKELLEIEGLDEPTVEALRERAKNALATIAQAQEESLGDNKPADDLLNLEGVDRDLAFKLAARGVCTLEDLAEQGIDDLADIEGLTDEKAGALIMAARNICWFGDEA', 'Thioredoxin': 'SDKIIHLTDDSFDTDVLKADGAILVDFWAEWCGPCKMIAPILDEIADEYQGKLTVAKLNIDQNPGTAPKYGIRGIPTLLLFKNGEVAATKVGALSKGQLKEFLDANLA', 'FLAG': 'DYKDDDDK', 'S-tag': 'KETAAAKFERQHMDS', 'CBP': 'KRRWKKNFIAVSAANRFKKISSSGAL', 'SUMO': 'MSDQEAKPSTEDLGDKKEGEYIKLKVIGQDSSEIHFKVKMTTHLKKLKESYCQRQGVPMNSLRFLFEGQRIADNHTPKELGMEEEDVIEVYQEQTGGHSTV', 'STREPII': 'WSHPQFEK', 'BAP': 'GLNDIFEAQKIEWHE', 'NT11': 'VSEPHDYNYEK', 'Fh8': 'MPSVQEVEKLLHVLDRNGDGKVSAEELKAFADDSKCPLDSNKIKAFIKEHDKNKDGKLDLKELVSILSS', 'Tab2': 'VVSHFND', 'Z-basic': 'VDNKFNKEQQNAFYEILHLPNLNEEQRNAFIQSLKDDPSQSANLLAEAKKLNDAQPK', 'ProteinA': 'MKKKNIYSIRKLGVGIASVTLGTLLISGGVTPAANAAQHDEAQQNAFYQVLNMPNLNADQRNGFIQSLKDDPSQSANVLGEAQKLNDSQAPKADAQQNNFNKDQQSAFYEILNMPNLNEAQRNGFIQSLKDDPSQSTNVLGEAKKLNESQAPKADNNFNKEQQNAFYEILNMPNLNEEQRNGFIQSLKDDPSQSANLLSEAKKLNESQAPKADNKFNKEQQNAFYEILHLPNLNEEQRNGFIQSLKDDPSQSANLLAEAKKLNDAQAPKADNKFNKEQQNAFYEILHLPNLTEEQRNGFIQSLKDDPSVSKEILAEAKKLNDAQAPKEEDNNKPGKEDNNKPGKEDNNKPGKEDNNKPGKEDNNKPGKEDGNKPGKEDNKKPGKEDGNKPGKEDNKKPGKEDGNKPGKEDGNKPGKEDGNGVHVVKPGDTVNDIAKANGADKIAADNKLADKNMIKPGQELVVDKKQPANHADANKAQALPETGEENPFIGTTVFGGLSLALGAALLEL', 'IMPACT': 'TNPGVSAWQVNTAYTAGQLVTYNGKTYKCLQPHTSLAGWEPSNVPALWQLQ', 'mysB': 'MTMYATLEEAIDAAREEFLADNPGIDAEDANVQQFNAQKYVLQDGDIMWQVEFFADEGEEGECLPMLSGEAAQSVFDGDYDEIEIRQEWQEENTLHEWDEGEFQLEPPLDTEEGRAAADEWDER', 'PolyR': 'RRRRR', 'c-myc': 'EQKLISEEDL', 'S': 'KETAAAKFERQHMDS', 'SBP-tag': 'MDEKTTGWRGGHVVEGLAGELEQLRARLEHHPQGQREP', 'Strep-tag': 'WRHPQFGG', 'Twin-Strep-tag': 'SAWSHPQFEKGGGSGGGSGGSAWSHPQFEK', 'HAT': 'KDHLIHNVHKEFHAHAHNK', 'BCCP': 'MDIRKIKKLIELVEESGISELEISEGEESVRISRAAPAASFPVMQQAYAAPMMQQPAQSNAAAPATVPSMEAPAAAEISGHIVRSPMVGTFYRTPSPDAKAFIEVGQKVNVDTLCIVEAMKMMNQIEADKSGTVKAILVESGQPVEFDEPLVVIE', 'HaloTag': 'SGSAEIGTGFPFDPHYVEVLGERMHYVDVGPRDGTPVLFLHGNPTSSYVWRNIIPHVAPTHRCIAPDLIGMGKSDKPDLGYFFDDHVRFMDAFIEALGLEEVVLVIHDWGSALGFHWAKRNPERVKGIAFMEFIRPIPTWDEWPEFARETFQAFRTTDVGRKLIIDQNVFIEGTLPCGVVRPLTEVEMDHYREPFLNPVDREPLWRFPNELPIAGEPANIVALVEEYMDWLHQSPVPKLLFWGTPGVLIPPAEAARLAKSLPNCKAVDIGPGLNLLQEDNPDLIGSEIARWLSTLEISG' } def run_N(P:str, gfp:bool, solu_tag:list)->dict: const = {} if gfp: for f in solu_tag: F = soluTag[f] const[f"con_gfp_N_{f}"] = P+T+L1+F+L2+G+H if not solu_tag: const[f"con_gfp_N"] = P+T+L2+G+H else: for f in solu_tag: F = soluTag[f] const[f"con_N_{f}"] = P+T+L1+F+H if not solu_tag: const[f"con_N"] = P+T+H # TODO: verify this one return const def run_C(P:str, gfp:bool, solu_tag:list)->dict: const = {} if gfp: for f in solu_tag: F = soluTag[f] const[f"con_gfp_C_{f}"] = H+G+L2+F+L1+T+P if not solu_tag: const[f"con_gfp_C"] = H+G+L2+T+P else: for f in solu_tag: F = soluTag[f] const[f"con_C_{f}"] = H+F+L1+T+P if not solu_tag: const[f"con_C"] = H+T+P #TODO verify this return const def build(P:str, solu_tag:list, GFP:bool, P_pos:str)->dict: """ This function calculate constructs to Protera's LAB considering: -orientation N to C terminal, or reverse -With/withou GFP -Fusion Proteins usage = build( amino_sequence, solutags, GFP:True/false, P_pos:"N+C","N","C") solutags availables: 'MBP', 'SUMO', 'Fh8', 'GST', 'NusA', 'Thioredoxin', 'FLAG', 'S-tag', 'CBP', 'STREPII', 'BAP', 'NT11', 'Tab2', 'Z-basic', 'ProteinA', 'IMPACT', 'mysB', 'PolyR', 'c-myc', 'S', 'SBP-tag', 'Strep-tag', 'Twin-Strep-tag', 'HAT', 'BCCP', 'HaloTag' """ if "all" in solu_tag: solu_tag = list(soluTag.keys()) d1 = {} d2 = {} if "C" in P_pos: d2 = run_C(P, GFP, solu_tag) if "N" in P_pos: d1 = run_N(P, GFP, solu_tag) return {**d1,**d2}