Spaces:
Runtime error
Runtime error
#%% | |
import pandas as pd | |
import numpy as np | |
import torch | |
import json | |
import re | |
from sentence_transformers.util import cos_sim | |
from sentence_transformers import SentenceTransformer | |
from sklearn.feature_extraction.text import CountVectorizer | |
import gradio as gr | |
import nltk | |
nltk.download('stopwords') | |
from nltk.corpus import stopwords | |
#%% | |
model = SentenceTransformer('sentence-transformers/multi-qa-distilbert-cos-v1') | |
russian_stopwords = stopwords.words('russian') + ['ВАШ'] | |
with open("top_150_symps_by_spec.json", 'r') as f: | |
symps = json.load(f) | |
with open("embeddings.npy", 'rb') as f: | |
embs = np.load(f) | |
def remove_numbers(text): | |
text = re.sub(r'\d+', '', text) | |
text = re.sub(r'[^\w\s]', '', text) | |
return text.strip() | |
vectorizer = CountVectorizer(ngram_range=(1, 3), | |
stop_words=russian_stopwords, | |
preprocessor=remove_numbers, | |
) | |
def get_symptomps_v2(text, treshold = 0.7): | |
try: | |
if isinstance(text, str): | |
text = [text] | |
X = vectorizer.fit_transform(text) | |
text_emb = model.encode(vectorizer.get_feature_names_out(), batch_size=64) | |
cos_sim_m = cos_sim(text_emb, embs).numpy() | |
cos_sim_m = np.where(cos_sim_m > treshold, cos_sim_m, -1) | |
arg_max_idx = np.argmax(cos_sim_m, axis=1) | |
outputs = [] | |
for idx, cos_sim_row in zip(arg_max_idx, cos_sim_m): | |
if cos_sim_row[idx] > 0: | |
outputs.append(symps[idx]) | |
if len(outputs) == 0: | |
return ['Симптомы не определены'] | |
return np.unique(outputs).tolist() | |
except: | |
return ['Симптомы не определены'] | |
#%% | |
gradio_app = gr.Interface( | |
get_symptomps_v2, | |
inputs=['text', | |
gr.Slider(minimum=0, maximum=1, step=0.05, label="Порог релевантности", value=0.8)], | |
outputs=[gr.JSON(label='Симптомы: ')], | |
description="Введите услугу:" | |
) | |
if __name__ == "__main__": | |
gradio_app.launch() | |
# %% | |