File size: 1,662 Bytes
0b11a42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60

#%%
import argparse
import os
import logging

from utils import make_output_dir,write_2_log,log_time
import map_2_HBDxBase as map_2_HBDxBase
import annotate_from_mapping as annotate_from_mapping


log = logging.getLogger(__name__)



#%%
# get command line arguments
parser = argparse.ArgumentParser()
parser.add_argument('--five_prime_adapter', type=str, default='GTTCAGAGTTCTACAGTCCGACGATC')
parser.add_argument('--fasta_file', type=str, help="Required to provide: --fasta_file sequences_to_be_annotated.fa") # NOTE: needs to be stored in "data" folder
args = parser.parse_args()
if not args.fasta_file:
    parser.print_help()
    exit()
five_prime_adapter = args.five_prime_adapter
sequence_file = args.fasta_file

#%%
@log_time(log)
def main(five_prime_adapter, sequence_file):
    """Executes 'make_anno'. 
    1. Maps input sequences to HBDxBase, the human genome, and a collection of viral and bacterial genomes.
    2. Extracts information from mapping files.
    3. Generates annotation columns and final annotation dataframe.

    Uses:

    - sequence_file
    - five_prime_adapter

    """
    output_dir = make_output_dir(sequence_file)
    os.chdir(output_dir)

    log_folder = "log"
    if not os.path.exists(log_folder):
        os.makedirs(log_folder)
    write_2_log(f"{log_folder}/make_anno.log")
    
    # add name of sequence_file to log file
    with open(f"{log_folder}/make_anno.log", "a") as ofile:
        ofile.write(f"Sequence file: {sequence_file}\n")
                    
    map_2_HBDxBase.main("../../data/" + sequence_file)
    annotate_from_mapping.main(five_prime_adapter)
   

main(five_prime_adapter, sequence_file)
# %%