Artemis-IA's picture
Create app.py
3daec5b
raw
history blame
4.42 kB
import streamlit as st
import pandas as pd
import numpy as np
import joblib
import spacy
from wordcloud import WordCloud
from io import StringIO, BytesIO
import mimetypes
from transformers import CamembertForSequenceClassification, CamembertTokenizer
import torch
# Model Loading
model = joblib.load('model.pkl')
vectorizer = joblib.load('vectorizer.pkl')
camembert_model = CamembertForSequenceClassification.from_pretrained("camembert-base", num_labels=2)
state_dict = torch.load('camembertperso.pth', map_location='cpu')
camembert_model.load_state_dict(state_dict, strict=False)
tokenizer = CamembertTokenizer.from_pretrained('camembert-base', do_lower_case=True)
nlp = spacy.load("fr_core_news_sm")
# Text Processing Functions
def clean_text(text):
return text.strip().lower()
def lemmatize_text(text):
doc = nlp(text)
lemmatized_text = " ".join([token.lemma_ for token in doc])
return lemmatized_text
# Prediction Functions
def predict_label(text):
cleaned_text = clean_text(text)
lemmatized_text = lemmatize_text(cleaned_text)
vectorized_text = vectorizer.transform([lemmatized_text])
label = model.predict(vectorized_text)[0]
probability_score = model.decision_function(vectorized_text)[0]
probability = 1 / (1 + np.exp(-probability_score))
return label, probability
def predict_camembert(text):
tokens = tokenizer.encode_plus(text, return_tensors="pt")
with torch.no_grad():
outputs = camembert_model(**tokens)
if len(outputs) == 1:
logits = outputs[0]
else:
logits = outputs[1]
predictions = torch.argmax(logits, dim=1).item()
probabilities = torch.softmax(logits, dim=1)[:, 1].item()
return predictions, probabilities
# App Interface
st.title('Analyse de sentiments')
st.write('Cet outil permet de prédire si une review est positive ou négative.')
review_text = st.text_area('Saisir la review ou charger un fichier :')
if st.button('Prédire et générer le nuage de mots'):
# LinearSVC Prediction
label_linear_svc, probability_linear_svc = predict_label(review_text)
# Display LinearSVC Results
st.write('Résultats de LinearSVC:')
if label_linear_svc == 0:
st.write('La review est négative.')
else:
st.write('La review est positive.')
# Display LinearSVC Prediction Score
st.write('Score de prédiction (LinearSVC) :', f'**{label_linear_svc}**', unsafe_allow_html=True)
# Display LinearSVC Probability
st.write('Probabilité (LinearSVC) :', f'**{probability_linear_svc:.2%}**', unsafe_allow_html=True)
# CamemBERT Prediction
label_camembert, probability_camembert = predict_camembert(review_text)
# Display CamemBERT Results
st.write('Résultats de Camembert:')
if label_camembert == 0:
st.write('La review est négative.')
else:
st.write('La review est positive.')
# Display CamemBERT Prediction Score
st.write('Score de prédiction (Camembert) :', f'**{label_camembert}**', unsafe_allow_html=True)
# Display CamemBERT Probability
st.write('Probabilité (Camembert) :', f'**{probability_camembert:.2%}**', unsafe_allow_html=True)
# Lemmatize and Exclude Stop Words
doc = nlp(review_text)
lemmatized_text_no_stopwords = " ".join([token.lemma_ for token in doc if not token.is_stop])
# Générer le nuage de mots
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(lemmatized_text_no_stopwords)
st.image(wordcloud.to_image())
# Créer un bouton pour l'upload d'un fichier
uploaded_file = st.file_uploader("Charger un fichier texte", type=["txt", "csv"])
if uploaded_file is not None:
content_type, _ = mimetypes.guess_type(uploaded_file.name)
if content_type == 'text/plain':
file_contents = uploaded_file.read().decode("utf-8")
st.text(file_contents)
# Lemmatiser le texte et exclure les mots vides
doc = nlp(file_contents)
lemmatized_text_no_stopwords = " ".join([token.lemma_ for token in doc if not token.is_stop])
# Générer le nuage de mots à partir du fichier uploadé
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(lemmatized_text_no_stopwords)
st.image(wordcloud.to_image())
elif content_type == 'text/csv':
df = pd.read_csv(uploaded_file)
st.write(df)