# -*- coding: utf-8 -*- """ Created on Mon Jun 15 11:24:45 2020 @author: luol2 """ import io def nest_overlap_entity(nest_list): temp_result_list={} for i in range(0, len(nest_list)): hpoid=nest_list[i][3] if hpoid not in temp_result_list.keys(): temp_result_list[hpoid]=nest_list[i] else: score=float(nest_list[i][4]) old_score=float(temp_result_list[hpoid][4]) if score>old_score: # retain higer score concept temp_result_list[hpoid]=nest_list[i] new_list=[] for hpoid in temp_result_list.keys(): new_list.append(temp_result_list[hpoid]) temp_result_list={} #same index, different ids for i in range(0, len(new_list)): ids=new_list[i][0]+' '+new_list[i][1] if ids not in temp_result_list.keys(): temp_result_list[ids]=new_list[i] else: score=float(nest_list[i][4]) old_score=float(temp_result_list[ids][4]) if score>old_score: temp_result_list[ids]=new_list[i] final_list=[] for ids in temp_result_list.keys(): final_list.append(temp_result_list[ids]) return final_list def combine_ml_dict(dict_tsv,ml_tsv,nest=True): fin_dic=io.StringIO(dict_tsv) fin_ml=io.StringIO(ml_tsv) fout=io.StringIO() all_dic=fin_dic.read().strip().split('\n\n') all_ml=fin_ml.read().strip().split('\n\n') fin_dic.close() fin_ml.close() for i in range(0,len(all_dic)): lines_dic=all_dic[i].split('\n') lines_ml=all_ml[i].split('\n') entity_list={} for j in range(1,len(lines_dic)): seg=lines_dic[j].split('\t') entity_list[lines_dic[j]]=[int(seg[0]),int(seg[1])] #dict results score 1.00 for j in range(1,len(lines_ml)): seg=lines_ml[j].split('\t') entity_list[lines_ml[j]]=[int(seg[0]),int(seg[1])] entity_list=sorted(entity_list.items(), key=lambda kv:(kv[1]), reverse=False) entity_list_sort=[] for ele in entity_list: entity_list_sort.append(ele[0]) final_entity=[] if len(entity_list_sort)!=0: first_entity=entity_list_sort[0].split('\t') nest_list=[first_entity] max_eid=int(first_entity[1]) for i in range(1,len(entity_list_sort)): segs=entity_list_sort[i].split('\t') if int(segs[0])> max_eid: if len(nest_list)==1: final_entity.append(nest_list[0]) nest_list=[] nest_list.append(segs) if int(segs[1])>max_eid: max_eid=int(segs[1]) else: tem=nest_overlap_entity(nest_list) final_entity.extend(tem) nest_list=[] nest_list.append(segs) if int(segs[1])>max_eid: max_eid=int(segs[1]) else: nest_list.append(segs) if int(segs[1])>max_eid: max_eid=int(segs[1]) if nest_list!=[]: if len(nest_list)==1: final_entity.append(nest_list[0]) else: tem=nest_overlap_entity(nest_list)#find max entity final_entity.extend(tem) fout.write(lines_ml[0]+'\n') for ele in final_entity: fout.write('\t'.join(ele)+'\n') fout.write('\n') return fout.getvalue()