File size: 1,524 Bytes
0b11a42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38

from typing import Dict

from anndata import AnnData

from ..processing.seq_tokenizer import SeqTokenizer
from ..utils.file import load
from ..utils.utils import *


def infer_tcga(cfg:Dict= None,path:str = None):
    if cfg['tensorboard']:
        from ..callbacks.tbWriter import writer
    cfg,net = get_model(cfg,path)
    inference_path = cfg['inference_settings']['sequences_path']
    original_infer_df = load(inference_path, index_col=0)
    if isinstance(original_infer_df,AnnData):
        original_infer_df = original_infer_df.var
    predicted_labels,logits,_,_,all_data,max_len,net,infer_df = infer_from_pd(cfg,net,original_infer_df,SeqTokenizer)
    
    #create inference_output if it does not exist
    if not os.path.exists(f"inference_output"):
        os.makedirs(f"inference_output")
    if cfg['log_embedds']:
        embedds_pd = log_embedds(cfg,net,all_data['infere_rna_seq'])
        embedds_pd.to_csv(f"inference_output/{cfg['model_name']}_embedds.csv")
    
    prepare_inference_results_tcga(cfg,predicted_labels,logits,all_data,max_len)
    
    #if sequences were trimmed, add mapping of trimmed sequences to original sequences
    if original_infer_df.shape[0] != infer_df.shape[0]:
        all_data["infere_rna_seq"] = add_original_seqs_to_predictions(infer_df,all_data['infere_rna_seq'])              
    #save
    all_data["infere_rna_seq"].to_csv(f"inference_output/{cfg['model_name']}_inference_results.csv")

    if cfg['tensorboard']:
        writer.close()
    return predicted_labels