import streamlit as st import pandas as pd import catboost from catboost import CatBoostClassifier import re import string from nltk.corpus import stopwords from pymystem3 import Mystem from joblib import load import nltk nltk.download('stopwords') import time def data_preprocessing(text): stop_words = set(stopwords.words('russian')) text = text.lower() text = re.sub("<.*?>", "", text) text = re.sub(r'http\S+', " ", text) text = re.sub(r'@\w+', ' ', text) text = re.sub(r'#\w+', ' ', text) text = re.sub(r'\d+', ' ', text) text = "".join([c for c in text if c not in string.punctuation]) return " ".join([word for word in text.split() if word not in stop_words]) def lemmatize_text(text): mystem = Mystem() lemmas = mystem.lemmatize(text) return ' '.join(lemmas) model = CatBoostClassifier() model.load_model('Weights/cat_model4.cbm') tfidf_vectorizer = load('Weights/tfidf_vectorizer.joblib') def classic_ml_page(): st.title("Классификация отзывов") user_review = st.text_area("Введите ваш отзыв здесь:") if st.button("Классифицировать"): if user_review: preprocessed_review = data_preprocessing(user_review) lemmatized_review = lemmatize_text(preprocessed_review) vectorized_review = tfidf_vectorizer.transform([lemmatized_review]) start_time = time.time() prediction = model.predict(vectorized_review) end_time = time.time() execution_time = end_time - start_time if prediction[0] == 1: st.write("Позитивный отзыв 😀") else: st.write("Негативный отзыв 😟") st.write(f'Время предсказания: {execution_time:.4f} секунд') else: st.write("Пожалуйста, введите отзыв для классификации.")