File size: 1,524 Bytes
0b11a42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
from typing import Dict
from anndata import AnnData
from ..processing.seq_tokenizer import SeqTokenizer
from ..utils.file import load
from ..utils.utils import *
def infer_tcga(cfg:Dict= None,path:str = None):
if cfg['tensorboard']:
from ..callbacks.tbWriter import writer
cfg,net = get_model(cfg,path)
inference_path = cfg['inference_settings']['sequences_path']
original_infer_df = load(inference_path, index_col=0)
if isinstance(original_infer_df,AnnData):
original_infer_df = original_infer_df.var
predicted_labels,logits,_,_,all_data,max_len,net,infer_df = infer_from_pd(cfg,net,original_infer_df,SeqTokenizer)
#create inference_output if it does not exist
if not os.path.exists(f"inference_output"):
os.makedirs(f"inference_output")
if cfg['log_embedds']:
embedds_pd = log_embedds(cfg,net,all_data['infere_rna_seq'])
embedds_pd.to_csv(f"inference_output/{cfg['model_name']}_embedds.csv")
prepare_inference_results_tcga(cfg,predicted_labels,logits,all_data,max_len)
#if sequences were trimmed, add mapping of trimmed sequences to original sequences
if original_infer_df.shape[0] != infer_df.shape[0]:
all_data["infere_rna_seq"] = add_original_seqs_to_predictions(infer_df,all_data['infere_rna_seq'])
#save
all_data["infere_rna_seq"].to_csv(f"inference_output/{cfg['model_name']}_inference_results.csv")
if cfg['tensorboard']:
writer.close()
return predicted_labels |