Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import torch | |
| import matplotlib.pyplot as plt | |
| import time | |
| import os | |
| import os | |
| # Use a custom cache directory for Hugging Face models | |
| os.environ["HF_HOME"] = "./hf_cache" | |
| # Ensure directory exists and is writable | |
| os.makedirs("./hf_cache", exist_ok=True) | |
| model_name = "tabularisai/multilingual-sentiment-analysis" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
| # Function for sentiment prediction with progress bar | |
| def predict_sentiment(texts): | |
| sentiments = [] | |
| sentiment_map = {0: "Very Negative", 1: "Negative", 2: "Neutral", 3: "Positive", 4: "Very Positive"} | |
| progress_bar = st.progress(0) | |
| total_texts = len(texts) | |
| for i, text in enumerate(texts): | |
| inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1) | |
| sentiment = sentiment_map[torch.argmax(probabilities, dim=-1).item()] | |
| sentiments.append(sentiment) | |
| # Update progress bar | |
| progress_bar.progress((i + 1) / total_texts) | |
| time.sleep(0.1) # Optional: To better visualize progress | |
| return sentiments | |
| # Streamlit UI | |
| st.title("Sentiment Analysis App") | |
| st.write("Upload a CSV or Excel file containing text data for sentiment analysis.") | |
| # File upload | |
| # uploaded_file = st.file_uploader("Upload a CSV or Excel file", type=["csv", "xlsx"]) | |
| uploaded_file = st.file_uploader("Upload a CSV or Excel file", type=["csv", "xlsx"], accept_multiple_files=False) | |
| if uploaded_file is not None: | |
| try: | |
| # Read file | |
| if uploaded_file.name.endswith(".csv"): | |
| df = pd.read_csv(uploaded_file) | |
| else: | |
| df = pd.read_excel(uploaded_file) | |
| st.write("Dataset Preview:") | |
| st.dataframe(df.head()) | |
| # Select text column | |
| text_column = st.selectbox("Select the text column for analysis", df.columns) | |
| except Exception as e: | |
| st.error(f"Error reading file: {e}") | |
| if st.button("Analyze Sentiment"): | |
| # Get text data | |
| texts = df[text_column].astype(str).tolist() | |
| # Predict sentiments with progress bar | |
| sentiments = predict_sentiment(texts) | |
| df["Sentiment"] = sentiments | |
| # Display results | |
| st.write("Sentiment Analysis Results:") | |
| st.dataframe(df[[text_column, "Sentiment"]]) | |
| # Pie chart of sentiment distribution | |
| st.write("Sentiment Distribution:") | |
| sentiment_counts = df["Sentiment"].value_counts() | |
| fig, ax = plt.subplots() | |
| ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct="%1.1f%%", startangle=90) | |
| ax.axis("equal") | |
| st.pyplot(fig) | |