Nlp_proj / Models /strim_nlp.py
Veronika1101's picture
Upload 20 files
d15a7ed verified
import streamlit as st
import pandas as pd
import catboost
from catboost import CatBoostClassifier
import re
import string
from nltk.corpus import stopwords
from pymystem3 import Mystem
from joblib import load
import nltk
nltk.download('stopwords')
import time
def data_preprocessing(text):
stop_words = set(stopwords.words('russian'))
text = text.lower()
text = re.sub("<.*?>", "", text)
text = re.sub(r'http\S+', " ", text)
text = re.sub(r'@\w+', ' ', text)
text = re.sub(r'#\w+', ' ', text)
text = re.sub(r'\d+', ' ', text)
text = "".join([c for c in text if c not in string.punctuation])
return " ".join([word for word in text.split() if word not in stop_words])
def lemmatize_text(text):
mystem = Mystem()
lemmas = mystem.lemmatize(text)
return ' '.join(lemmas)
model = CatBoostClassifier()
model.load_model('Weights/cat_model4.cbm')
tfidf_vectorizer = load('Weights/tfidf_vectorizer.joblib')
def classic_ml_page():
st.title("Классификация отзывов")
user_review = st.text_area("Введите ваш отзыв здесь:")
if st.button("Классифицировать"):
if user_review:
preprocessed_review = data_preprocessing(user_review)
lemmatized_review = lemmatize_text(preprocessed_review)
vectorized_review = tfidf_vectorizer.transform([lemmatized_review])
start_time = time.time()
prediction = model.predict(vectorized_review)
end_time = time.time()
execution_time = end_time - start_time
if prediction[0] == 1:
st.write("Позитивный отзыв 😀")
else:
st.write("Негативный отзыв 😟")
st.write(f'Время предсказания: {execution_time:.4f} секунд')
else:
st.write("Пожалуйста, введите отзыв для классификации.")