File size: 1,662 Bytes
0b11a42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
#%%
import argparse
import os
import logging
from utils import make_output_dir,write_2_log,log_time
import map_2_HBDxBase as map_2_HBDxBase
import annotate_from_mapping as annotate_from_mapping
log = logging.getLogger(__name__)
#%%
# get command line arguments
parser = argparse.ArgumentParser()
parser.add_argument('--five_prime_adapter', type=str, default='GTTCAGAGTTCTACAGTCCGACGATC')
parser.add_argument('--fasta_file', type=str, help="Required to provide: --fasta_file sequences_to_be_annotated.fa") # NOTE: needs to be stored in "data" folder
args = parser.parse_args()
if not args.fasta_file:
parser.print_help()
exit()
five_prime_adapter = args.five_prime_adapter
sequence_file = args.fasta_file
#%%
@log_time(log)
def main(five_prime_adapter, sequence_file):
"""Executes 'make_anno'.
1. Maps input sequences to HBDxBase, the human genome, and a collection of viral and bacterial genomes.
2. Extracts information from mapping files.
3. Generates annotation columns and final annotation dataframe.
Uses:
- sequence_file
- five_prime_adapter
"""
output_dir = make_output_dir(sequence_file)
os.chdir(output_dir)
log_folder = "log"
if not os.path.exists(log_folder):
os.makedirs(log_folder)
write_2_log(f"{log_folder}/make_anno.log")
# add name of sequence_file to log file
with open(f"{log_folder}/make_anno.log", "a") as ofile:
ofile.write(f"Sequence file: {sequence_file}\n")
map_2_HBDxBase.main("../../data/" + sequence_file)
annotate_from_mapping.main(five_prime_adapter)
main(five_prime_adapter, sequence_file)
# %%
|