Spaces:
Runtime error
Runtime error
File size: 3,458 Bytes
7b86ace 427917d 50ed058 7b86ace 50ed058 7b86ace 427917d 7b86ace 3326af6 50ed058 7b86ace 427917d 7b86ace 030dff0 7b86ace 50ed058 7b86ace d5175dd 7b86ace d5175dd 7b86ace d5175dd 7b86ace d5175dd 7b86ace 28192e4 7b86ace 5d88c86 7b86ace |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
#%%
import pandas as pd
import numpy as np
import torch
from sentence_transformers.util import cos_sim
from sentence_transformers import SentenceTransformer
import gradio as gr
#%%
# etalon = pd.read_csv("etalon_prod.csv")
df = pd.read_csv("preprocessed_complaints.csv")
model = SentenceTransformer('sentence-transformers/multi-qa-distilbert-cos-v1')
unique_complaints = df['Жалобы'].unique()
with open("embeddings.npy", 'rb') as f:
embeddings = np.load(f)
#%%
def get_recommend(user_input,
top_k_spec = 3,
top_k_services = 5,
treshold = 0.8):
cols_for_top_k = ["Специальность врача",
"Рекомендуемые специалисты"]
usr_embeddings = model.encode(user_input)
cos_similarity = cos_sim(usr_embeddings, embeddings).detach().numpy()
sorted_idx = cos_similarity[0].argsort()[::-1]
cos_similarity.sort()
cos_similarity = cos_similarity[0][::-1]
sorted_df = df.loc[sorted_idx].copy()
sorted_df['cos_sim'] = cos_similarity
sorted_df = sorted_df[sorted_df['cos_sim'] > treshold]
result = {}
for col in cols_for_top_k:
result[col] = sorted_df[col].value_counts()[:top_k_spec].index.tolist()
result['Жалобы'] = sorted_df['Жалобы'].value_counts()[:top_k_services].index.tolist()
top_k_mkb = sorted_df['Диагноз МКБ'].value_counts()[:top_k_services].index.tolist()
result['Диагноз МКБ'] = top_k_mkb
categories = ['Инструментальная диагностика', 'Лабораторная диагностика']
top_k_services_lst_by_mkb = []
for mkb in top_k_mkb:
temp_lst = []
slice_df = sorted_df[sorted_df['Диагноз МКБ'] == mkb]
for category in categories:
top_k_services_in_cat_mkb = slice_df[slice_df['service_name_category'] == category]['Рекомендации по обследованию'].value_counts()[:top_k_services].index.tolist()
temp_lst.append({category:top_k_services_in_cat_mkb})
top_k_services_lst_by_mkb.append({mkb:temp_lst})
top_k_services_lst = []
for category in categories:
slice_df = sorted_df[sorted_df['service_name_category'] == category]
list_top_k_services = slice_df['Рекомендации по обследованию'].value_counts()[:top_k_services].index.tolist()
top_k_services_lst.append({category:list_top_k_services})
result['Рекомендации по обследованию'] = top_k_services_lst
result['Рекомендации по обследованию по МКБ'] = top_k_services_lst_by_mkb
return result
#%%
gradio_app = gr.Interface(
get_recommend,
inputs=['text',
gr.Slider(minimum=1, maximum=10, step=1, label="Топ N специалистов", value=3),
gr.Slider(minimum=1, maximum=10, step=1, label="Топ N услуг", value=5),
gr.Slider(minimum=0, maximum=1, step=0.05, label="Порог релевантности", value=0.8)],
outputs=[gr.JSON(label='Рекомендации: ')],
# title="Предсказание топ-10 наиболее схожих услуг",
description="Введите услугу:"
)
if __name__ == "__main__":
gradio_app.launch()
# %%
|