import random
import warnings
import pandas as pd
from PIL import Image
import streamlit as st
from wordcloud import WordCloud
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from utils.utils import get_top_ngram

NLTK_DATA = os.getenv("NLTK_PATH")

if not NLTK_DATA:
    NLTK_DATA = "/home/user/code/nltk_data"

nltk.data.path.append(NLTK_DATA)

warnings.filterwarnings("ignore")

nltk.data.path.append(NLTK_DATA)
try:
    nltk.data.find("corpora/stopwords.zip")
except LookupError:
    nltk.download("stopwords")
    nltk.download("stopwords", download_dir=NLTK_DATA)

try:
    nltk.data.find("tokenizers/punkt.zip")
except LookupError:
    nltk.download("punkt", download_dir=NLTK_DATA)


@st.cache_data
def load_data(df):
    # df = pd.read_csv(file_path, dtype={'text': 'string', 'sentiment_label': 'category'})
    df["createdAt"] = pd.to_datetime(df["createdAt"])
    df["date"] = df["createdAt"].dt.strftime("%Y-%m-%d")
    return df


@st.cache_data
def process_texts(texts):
    custom_stopwords = set([
    'ser', 'haber', 'hacer', 'tener', 'poder', 'ir', 'q', 'si', 'solo', 'saber', 'decir',
    'dar', 'querer', 'ver', 'así', 'sos', 'maje', 'dejar', 'si', 'solo', 'si', 'op', 'vos',
    'cada', 'mismo', 'usted', 'mas', 'pues', 'andar', 'ahora', 'claro', 'nunca', 'quedar', 'pasar',
    'venir', 'poner', 'dio', 'señora', 'señor', 'ahí', 'asi', 'vez', 'jajaja'
    ])
    stop_words = set(stopwords.words("spanish"))
    stop_words.update(custom_stopwords)
    tokenized_texts = texts.apply(word_tokenize)
    tokenized_texts = tokenized_texts.apply(
        lambda x: [word.lower() for word in x if word.lower() not in stop_words]
    )
    texts_cleaned = tokenized_texts.apply(lambda x: " ".join(x))
    return texts_cleaned


def custom_color_func(word, font_size, position, orientation, font_path, random_state):
    color_palette = ["#ff2b2b", "#83c9ff", "#0068c9"]
    return random.choice(color_palette)


def display_word_cloud(dataframe):
    all_text = " ".join(dataframe["text"])
    wordcloud = WordCloud(
        background_color="#fff", colormap="autumn", color_func=custom_color_func
    ).generate(all_text)
    wordcloud_image = wordcloud.to_array()

    fig = go.Figure()
    fig.add_layout_image(
        dict(
            source=Image.fromarray(wordcloud_image),
            x=0,
            y=1,
            sizex=1,
            sizey=1.3,
            opacity=1,
        )
    )
    fig.update_layout(
        autosize=False,
        height=170,
        width=500,
        margin=dict(l=0, r=0, t=0, b=0),
        xaxis=dict(visible=False),
        yaxis=dict(visible=False),
    )

    return fig


def most_common_trigrams(df, pdf=False):
    stop_words = set(stopwords.words("spanish"))  # noqa: F841

    colors = ["#ff2b2b", "#83c9ff", "#0068c9"]
    fig = make_subplots(rows=1, cols=3)

    sentiment_list = ["positive", "neutral", "negative"]
    sentiment_list2 = ["POS", "NEU", "NEG"]

    for i in range(3):
        texts = df[df["sentiment_label"] == sentiment_list2[i]]["text"]
        texts_cleaned = process_texts(texts)

        top_n_bigrams = get_top_ngram(texts_cleaned, 2)[:15]
        x, y = map(list, zip(*top_n_bigrams))

        fig.add_trace(
            go.Bar(
                x=y,
                orientation="h",
                type="bar",
                name=sentiment_list[i].title(),
                marker=dict(color=colors[i]),
                text=x,
                textposition="inside",
                hovertemplate="%{text}: %{y}",
            ),
            1,
            i + 1,
        )

    fig.update_layout(
        autosize=False,
        margin=dict(t=0, b=0, l=0, r=0),
        height=250,
    )

    return fig


def display_target_count(df):
    colors = ["#83c9ff", "#ff2b2b", "#0068c9"]
    fig = make_subplots(rows=1, cols=2, specs=[[{"type": "pie"}, {"type": "bar"}]])
    fig.add_trace(
        go.Pie(
            labels=df.sentiment_label.value_counts().index,
            values=df.sentiment_label.value_counts().values,
        ),
        1,
        1,
    )
    fig.update_traces(
        hoverinfo="label+percent",
        textfont_size=18,
        marker=dict(colors=colors, line=dict(color="#fff", width=1)),
    )
    fig.add_trace(
        go.Bar(
            x=df.sentiment_label.value_counts().index,
            y=df.sentiment_label.value_counts().values,
            marker_color=colors,
        ),
        1,
        2,
    )
    fig.update_layout(
        title_text="Análisis de Sentimientos",
        title_y=1,
        title_font=dict(color="#808495", size=15),
        autosize=True,
        height=250,
        margin=dict(l=0, r=0, t=25, b=10),
        xaxis=dict(visible=False),
        yaxis=dict(visible=False),
    )

    return fig


def sentiment_over_date(df):
    df = load_data(df)
    grouped = df.groupby(["date", "sentiment_label"]).size().unstack(fill_value=0)

    fig = go.Figure()

    colors = ["#ff2b2b", "#83c9ff", "#0068c9"][::-1]
    for idx, sentiment_label in enumerate(grouped.columns):
        fig.add_trace(
            go.Scatter(
                x=grouped.index,
                y=grouped[sentiment_label],
                mode="lines",
                name=sentiment_label.capitalize(),
                stackgroup="one",
                line=dict(width=2, color=colors[idx]),
                fillcolor=colors[idx],
                hoverinfo="y+name",
            )
        )
    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=False)
    fig.update_layout(
        title={
            "text": "Sentimiento a través del tiempo",
            "x": 0.2,
            "y": 1,
            "xanchor": "center",
            "yanchor": "top",
            "font": {"size": 15, "color": "#808495", "family": "Arial"},
        },
        xaxis_title="Fecha",
        yaxis_title="Conteo",
        hovermode="x",
        showlegend=True,
        autosize=False,
        height=250,
        width=500,
        margin=dict(l=0, r=0, t=40, b=0),
        plot_bgcolor="white",
        paper_bgcolor="white",
    )

    return fig


##############################################################################################################################


def crear_grafico_dispersion(df):
    fig = px.scatter(
        df,
        x="likeCount",
        y="sentiment_label",
        color="sentiment_label",
        labels={
            "likeCount": "Número de Likes",
            "sentiment_label": "Etiqueta de Sentimiento",
        },
        title="Relación entre Número de Likes y Etiquetas de Sentimiento",
    )

    fig.update_layout(
        title_y=1,
        title_font=dict(color="#808495", size=15),
        autosize=True,
        height=250,
        margin=dict(l=0, r=0, t=20, b=0),
        # xaxis=dict(visible=False),
        # yaxis=dict(visible=False)
    )

    return fig


def bubble_fig(df):
    bubble_chart_data = (
        df.groupby("account_creation_time").size().reset_index(name="user_count")
    )
    bubble_fig = px.scatter(
        bubble_chart_data,
        x="account_creation_time",
        y="user_count",
        size="user_count",
        title="Tiempo de Creación de Cuenta<br>vs. Número de Usuarios",
        labels={
            "account_creation_time": "Tiempo de Creación de Cuenta (meses)",
            "user_count": "Número de Usuarios",
        },
    )
    return bubble_fig


def hist_fig(df):
    hist_fig = px.histogram(
        df,
        x="account_creation_time",
        title="Distribución del Tiempo de Creación de Cuenta",
        labels={
            "account_creation_time": "Tiempo de Creación de Cuenta (meses)",
            "user_count": "Número de Usuarios",
        },
        nbins=25,
    )

    return hist_fig


def stacked_bar_fig(df):
    stacked_bar_fig = px.histogram(
        df,
        x="account_creation_time",
        color="sentiment_label",
        title="Distribución del Tiempo de <br>Creación de Cuenta por Sentimiento de Comentario",
        labels={
            "account_creation_time": "Tiempo de Creación de Cuenta (meses)",
            "count": "Número de Usuarios",
            "sentiment_beto": "Sentimiento",
        },
        barmode="stack",
        nbins=25,
    )
    return stacked_bar_fig


def metrics_bar(tweet_data, df):
    st.write(
        """
    <style>
    div[data-testid="stMetric"]
    {
        background-color: #00000005;
        color: black;
        padding: 10px 0 0 10px;
        border-radius: 5px;
    }
    </style>
            
    """,
        unsafe_allow_html=True,
    )

    avg_time = df["account_creation_time"].mean()
    min_time = df["account_creation_time"].min()
    max_time = df["account_creation_time"].max()

    left, right = st.columns([2, 1])

    with left:
        with st.container(border=True):
            # st.write("###### Analysis of Time Metrics")
            col1, col2, col3 = st.columns(3)
            col1.metric("Tiempo Promedio", f"{round(avg_time/12)} años")
            col2.metric("### Tiempo Mínimo", f"{min_time} meses")
            col3.metric("Tiempo Máximo", f"{round(max_time/12)} años")

    with right:
        with st.container(border=True):
            # st.write("###### Sentiment Breakdown")
            pos, neu, neg = st.columns(3)
            # st.info(f"##### **Overall Sentiment**: :{TEXT_COLOR[tweet_data['overall_sentiment'].lower()]}[**{tweet_data['overall_sentiment']}**]")
            pos.metric(label=":green[Positive]", value=tweet_data["positive"])
            neu.metric(label=":gray[Neutral]", value=tweet_data["neutral"])
            neg.metric(label=":red[Negative]", value=tweet_data["negative"])