import os
# Disable tokenizers parallelism to avoid deadlocks
os.environ["TOKENIZERS_PARALLELISM"] = "false"

import streamlit as st
import requests
from bs4 import BeautifulSoup
from transformers import pipeline
from fpdf import FPDF
import pandas as pd
import torch
from transformers import pipeline, AutoTokenizer, AutoModel
from tqdm import tqdm
from concurrent.futures import ProcessPoolExecutor
from summarizer import Summarizer
import re

@st.cache_data
def parse_html_file(file_path):
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            html_content = file.read()
            soup = BeautifulSoup(html_content, "html.parser")
            return soup
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

@st.cache_data
def scrape_amazon_product(url):
    global revList
    HEADERS = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', 
        'Accept-Language': 'en-US, en;q=0.5'
    }
    try:
        response = requests.get(url, headers=HEADERS)
        if response.status_code == 200:
            with open("temp.html", 'wb') as file:
                file.write(response.content)
        else:
            print(f"Failed to download HTML. Status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred: {e}")

    current_directory = os.getcwd()
    file_name = "temp.html"
    file_path = os.path.join(current_directory, file_name)
    global global_file_path
    global_file_path = file_path

    soup = parse_html_file(file_path)

    product_name_element = soup.find('span', {'id': 'productTitle'})
    product_name = product_name_element.text.strip() if product_name_element else None

    categories = soup.find_all('a', {'class': 'a-link-normal a-color-tertiary'})
    category = categories[-1].text.strip() if categories else None

    product_description_element = soup.find('div', {'id': 'productDescription'})
    product_description = product_description_element.text.strip() if product_description_element else None

    review_elements = soup.find_all('span', {'data-hook': 'review-body'})
    reviews = [review.text.strip() for review in review_elements]

    global revList
    revList = reviews

    return {
        'product_name': product_name,
        'category': category,
        'product_description': product_description,
        'Reviews': reviews
    }

def CalcReviews(reviews):
    positive_reviews = []
    negative_reviews = []

    model = Summarizer()
    for review in tqdm(reviews):
        summary = model(review, num_sentences=1)
        if "good" in summary or "great" in summary:
            positive_reviews.append(review)
        else:
            negative_reviews.append(review)
    
    ratio = len(positive_reviews) / (len(negative_reviews) if negative_reviews else 1)
    
    positive_summary = model("\n".join(positive_reviews), num_sentences=3)
    negative_summary = model("\n".join(negative_reviews), num_sentences=3)

    return {
        'positive_reviews': positive_reviews,
        'negative_reviews': negative_reviews,
        'Ratio of Positive to Negative Reviews': ratio,
        'positive_summary': positive_summary,
        'negative_summary': negative_summary
    }

def generate_pdf(product_data, review_data):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    
    pdf.cell(200, 10, txt="Product Report", ln=True, align="C")

    pdf.cell(200, 10, txt=f"Product Name: {product_data['product_name']}", ln=True, align="L")
    pdf.cell(200, 10, txt=f"Category: {product_data['category']}", ln=True, align="L")
    pdf.cell(200, 10, txt="Product Description:", ln=True, align="L")
    pdf.multi_cell(0, 10, txt=product_data['product_description'])

    pdf.cell(200, 10, txt="Reviews Summary", ln=True, align="L")
    pdf.cell(200, 10, txt=f"Number of Positive Reviews: {len(review_data['positive_reviews'])}", ln=True, align="L")
    pdf.cell(200, 10, txt=f"Number of Negative Reviews: {len(review_data['negative_reviews'])}", ln=True, align="L")
    pdf.cell(200, 10, txt=f"Positive to Negative Ratio: {review_data['Ratio of Positive to Negative Reviews']}", ln=True, align="L")

    pdf.cell(200, 10, txt="Summary of Positive Reviews", ln=True, align="L")
    pdf.multi_cell(0, 10, txt=review_data['positive_summary'])

    pdf.cell(200, 10, txt="Summary of Negative Reviews", ln=True, align="L")
    pdf.multi_cell(0, 10, txt=review_data['negative_summary'])

    pdf_file_path = "product_report.pdf"
    pdf.output(pdf_file_path)
    
    return pdf_file_path

def get_answer(question, pdf_path):
    headers = {
        'Authorization': 'Bearer YOUR_API_KEY',
        'Content-Type': 'application/json'
    }

    files = {'file': open(pdf_path, 'rb')}
    
    response1 = requests.post(
        'https://api.chatpdf.com/v1/sources', headers=headers, files=files)

    if response1.status_code == 200:
        source_id = response1.json()['sourceId']
    else:
        st.error("Failed to upload PDF to ChatPDF.")
        return None

    data = {
        'sourceId': source_id,
        'messages': [
            {
                'role': "user",
                'content': question,
            }
        ]
    }

    response = requests.post(
        'https://api.chatpdf.com/v1/chats/message', headers=headers, json=data)

    if response.status_code == 200:
        return response.json()['content']
    else:
        st.error("Failed to get response from ChatPDF.")
        return None

# Streamlit application
st.title("Amazon Product Insights Dashboard")

# URL input
url = st.text_input("Enter Amazon Product URL:")

if url:
    product_data = scrape_amazon_product(url)

    if product_data:
        st.header(product_data['product_name'])
        st.subheader("Product Description")
        st.write(product_data['product_description'])

        st.subheader("Reviews")
        st.write(product_data['Reviews'])
        review_data = CalcReviews(product_data['Reviews'])

        st.metric("Number of Positive Reviews" + review_data['positive_reviews'])
        st.metric("Number of Negative Reviews" + review_data['negative_reviews'])
        st.write("Positive to Negative Ratio : " + review_data['Ratio of Positive to Negative Reviews'])

        st.subheader("Summary of Positive Reviews")
        st.write(review_data['positive_summary'])

        st.subheader("Summary of Negative Reviews")
        st.write(review_data['negative_summary'])

        # Generate PDF
        pdf_path = generate_pdf(product_data, review_data)

        # Chatbot interaction
        st.subheader("Chat with the Product")
        user_question = st.text_input("Ask a question about the product:")

        if user_question:
            response = get_answer(user_question, pdf_path)
            st.write(response)