Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from catboost import CatBoostClassifier | |
import re | |
import string | |
from nltk.corpus import stopwords | |
from pymystem3 import Mystem | |
from joblib import load | |
import nltk | |
nltk.download('stopwords') | |
def data_preprocessing(text): | |
stop_words = set(stopwords.words('russian')) | |
text = text.lower() | |
text = re.sub("<.*?>", "", text) | |
text = re.sub(r'http\S+', " ", text) | |
text = re.sub(r'@\w+', ' ', text) | |
text = re.sub(r'#\w+', ' ', text) | |
text = re.sub(r'\d+', ' ', text) | |
text = "".join([c for c in text if c not in string.punctuation]) | |
return " ".join([word for word in text.split() if word not in stop_words]) | |
def lemmatize_text(text): | |
mystem = Mystem() | |
lemmas = mystem.lemmatize(text) | |
return ' '.join(lemmas) | |
model = CatBoostClassifier() | |
model.load_model('cat_model4.cbm') | |
tfidf_vectorizer = load('tfidf_vectorizer.joblib') | |
def classic_ml_page(): | |
st.title("Классификация отзывов о медицинских учреждениях") | |
user_review = st.text_area("Введите ваш отзыв здесь:") | |
if st.button("Классифицировать"): | |
if user_review: | |
preprocessed_review = data_preprocessing(user_review) | |
lemmatized_review = lemmatize_text(preprocessed_review) | |
vectorized_review = tfidf_vectorizer.transform([lemmatized_review]) | |
prediction = model.predict(vectorized_review) | |
if prediction[0] == 1: | |
st.write("Позитивный отзыв 😀") | |
else: | |
st.write("Негативный отзыв 😟") | |
else: | |
st.write("Пожалуйста, введите отзыв для классификации.") | |