#%% import pandas as pd import numpy as np import torch from sentence_transformers.util import cos_sim from sentence_transformers import SentenceTransformer import gradio as gr #%% # etalon = pd.read_csv("etalon_prod.csv") df = pd.read_csv("preprocessed_complaints.csv") model = SentenceTransformer('sentence-transformers/multi-qa-distilbert-cos-v1') unique_complaints = df['Жалобы'].unique() with open("embeddings.npy", 'rb') as f: embeddings = np.load(f) #%% def get_recommend(user_input, top_k_spec = 3, top_k_services = 5, treshold = 0.8): cols_for_top_k = ["Специальность врача", "Рекомендуемые специалисты"] usr_embeddings = model.encode(user_input) cos_similarity = cos_sim(usr_embeddings, embeddings).detach().numpy() sorted_idx = cos_similarity[0].argsort()[::-1] cos_similarity.sort() cos_similarity = cos_similarity[0][::-1] sorted_df = df.loc[sorted_idx].copy() sorted_df['cos_sim'] = cos_similarity sorted_df = sorted_df[sorted_df['cos_sim'] > treshold] result = {} for col in cols_for_top_k: result[col] = sorted_df[col].value_counts()[:top_k_spec].index.tolist() result['Жалобы'] = sorted_df['Жалобы'].value_counts()[:top_k_services].index.tolist() top_k_mkb = sorted_df['Диагноз МКБ'].value_counts()[:top_k_services].index.tolist() result['Диагноз МКБ'] = top_k_mkb categories = ['Инструментальная диагностика', 'Лабораторная диагностика'] top_k_services_lst_by_mkb = [] for mkb in top_k_mkb: temp_lst = [] slice_df = sorted_df[sorted_df['Диагноз МКБ'] == mkb] for category in categories: top_k_services_in_cat_mkb = slice_df[slice_df['service_name_category'] == category]['Рекомендации по обследованию'].value_counts()[:top_k_services].index.tolist() temp_lst.append({category:top_k_services_in_cat_mkb}) top_k_services_lst_by_mkb.append({mkb:temp_lst}) top_k_services_lst = [] for category in categories: slice_df = sorted_df[sorted_df['service_name_category'] == category] list_top_k_services = slice_df['Рекомендации по обследованию'].value_counts()[:top_k_services].index.tolist() top_k_services_lst.append({category:list_top_k_services}) result['Рекомендации по обследованию'] = top_k_services_lst result['Рекомендации по обследованию по МКБ'] = top_k_services_lst_by_mkb return result #%% gradio_app = gr.Interface( get_recommend, inputs=['text', gr.Slider(minimum=1, maximum=10, step=1, label="Топ N специалистов", value=3), gr.Slider(minimum=1, maximum=10, step=1, label="Топ N услуг", value=5), gr.Slider(minimum=0, maximum=1, step=0.05, label="Порог релевантности", value=0.8)], outputs=[gr.JSON(label='Рекомендации: ')], # title="Предсказание топ-10 наиболее схожих услуг", description="Введите услугу:" ) if __name__ == "__main__": gradio_app.launch() # %%