File size: 1,556 Bytes
c2a02c6
1
'''
This code merges SwissModel model summary files (metadata) into one file to be used in feature vector creation.
Please run this code in the folder wherein downloaded .tar files are downloaded.
Merged file will be found under extract_swissmodel_structures folder that will be created when this code is run.
'''


import tarfile, glob, os, shutil
import argparse

parser = argparse.ArgumentParser(description='ASCARIS')

parser.add_argument('-folder_name', '--folder_name',
                    help='Enter the directory where meta-data is found.',
                    default=1)

args = parser.parse_args()

meta_data = args.folder_name
def swissmodel_file():
    os.makedirs('input_files/extract_swissmodel_structures/', exist_ok=True)

    all_swissmodel = open('input_files/swissmodel_structures.txt', 'w')
    all_swissmodel.write('UniProtKB_ac	iso_id	uniprot_seq_length	uniprot_seq_md5	coordinate_id	provider	from	to	template	qmeandisco_global	seqid	url')
    all_swissmodel.write('\n')
    for f in glob.glob(f'{meta_data}/*.tar.gz'):
        name = f.split('/')[-1].split('.')[0]
        with tarfile.open(f) as tar:
            tar.extractall(f'input_files/extract_swissmodel_structures/{name}')
            with open(f'input_files/extract_swissmodel_structures/{name}/SWISS-MODEL_Repository/INDEX') as x:
                lines = (x.readlines())[7:]
                for line in lines:
                    all_swissmodel.write(line)
    shutil.rmtree('input_files/extract_swissmodel_structures/')


if __name__ == '__main__':
    swissmodel_file()