import torch
import numpy as np
import pandas as pd
from newsfetch.news import newspaper
from transformers import pipeline
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from newspaper import Article
from sklearn.preprocessing import LabelEncoder
import joblib


# Example usage:

import streamlit as st

def main():
    st.title("URL and Text Input App")

    # Get URL input from the user
    url_input = st.text_input("Enter URL:", "")
    def scrape_news_content(url):
      try:
          news_article = newspaper(url)
          print("scraped: ",news_article)
          return news_article.article
      except Exception as e:
          return "Error: " + str(e)

    def summarize_with_t5(article_content, classification, model, tokenizer, device):
        article_content = str(article_content)
        prompt = "Classification: " + str(classification) + "\n"
        if not article_content or article_content == "nan":
            return "", ""
        if classification == "risks":
            prompt = "summarize the key supply chain risks: "
        elif classification == "opportunities":
            prompt = "summarize the key supply chain opportunities: "
        elif classification == "neither":
            print("Nooo")
            return "None", "None"

        input_text = prompt + article_content
        input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)

        model = model.to(device)  #/ Move the model to the correct device
        summary_ids = model.generate(input_ids.to(device), max_length=150, num_beams=4, length_penalty=2.0, early_stopping=True)
        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        print(summary)
        if classification in ["risks", "opportunities"]:
            st.write("This article is related to the supply chain.")
            if classification == "risks":
                return summary, "None"
            elif classification == "opportunities":
                return "None", summary
            else:
              return None,None
        else:
            st.write("This article is not classified as related to the supply chain.")
      
    def classify_and_summarize(input_text, cls_model, tokenizer_cls, label_encoder, model_summ, tokenizer_summ, device):
        if input_text.startswith("http"):
            # If the input starts with "http", assume it's a URL and extract content
            article_content = scrape_news_content(input_text)
        else:
            # If the input is not a URL, assume it's the content
            article_content = input_text

        # Perform sentiment classification
        inputs_cls = tokenizer_cls(article_content, return_tensors="pt", max_length=512, truncation=True)
        inputs_cls = {key: value.to(device) for key, value in inputs_cls.items()}

        # Move cls_model to the specified device
        cls_model = cls_model.to(device)

        outputs_cls = cls_model(**inputs_cls)
        logits_cls = outputs_cls.logits
        predicted_class = torch.argmax(logits_cls, dim=1).item()
        print("predicted_class: ", predicted_class)
        classification = label_encoder.inverse_transform([predicted_class])[0]
        print("classification: ", classification)

        # Perform summarization based on the classification
        print("article_content:", article_content)
        summary_risk, summary_opportunity = summarize_with_t5(article_content, classification, model_summ, tokenizer_summ, device)
        if summary_risk is None:
            print("No risk summary generated.")
            summary_risk = "No risk summary available"  # Provide a default value or handle accordingly
        if summary_opportunity is None:
            print("No opportunity summary generated.")
            summary_opportunity = "No opportunity summary available"  # Provide a default value or handle accordingly

        return classification, summary_risk, summary_opportunity


    print(url_input)
    cls_model =AutoModelForSequenceClassification.from_pretrained("riskclassification_finetuned_xlnet_model_ld")
    tokenizer_cls = AutoTokenizer.from_pretrained("xlnet-base-cased")
    label_encoder = LabelEncoder()

    # Assuming 'label_column values' is the column you want to encode
    label_column_values = ["risks","opportunities","neither"]

    # Extract the target column
    #y = data[label_column].values.reshape(-1, 1)

    label_encoder.fit_transform(label_column_values)

    print("Label encoder values")

    # Replace the original column with the encoded values
    label_encoder_path = "riskclassification_finetuned_xlnet_model_ld/encoder_labels.pkl"
    joblib.dump(label_encoder, label_encoder_path)

    model_summ = T5ForConditionalGeneration.from_pretrained("t5-small")
    tokenizer_summ = T5Tokenizer.from_pretrained("t5-small")
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


    classification, summary_risk, summary_opportunity = classify_and_summarize(url_input, cls_model, tokenizer_cls, label_encoder, model_summ, tokenizer_summ, device)

    print("Classification:", classification)
    print("Risk Summary:", summary_risk)
    print("Opportunity Summary:", summary_opportunity)


    # Display the entered URL
    st.write("Entered URL:", url_input)
    st.write("Classification:",classification)
    st.write("Risk Summary:",summary_risk)
    st.write("Opportunity Summary:",summary_opportunity)

if __name__ == "__main__":
    main()