"""
#App: NLP App with Streamlit
Description
This is a Natural Language Processing(NLP) base Application that is useful for 
Document/Text Summarization from Bangla images and English Images/PDF files.
"""
# Core Pkgs
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
#os.system('sudo apt-get install tesseract-ocr-eng')
#os.system('sudo apt-get install tesseract-ocr-ben')

#os.system('wget https://github.com/tesseract-ocr/tessdata/raw/main/ben.traineddata')
#os.system('gunzip ben.traineddata.gz ')
#os.system('sudo mv -v ben.traineddata /usr/local/share/tessdata/')
#os.system('pip install -q pytesseract')
#os.system('conda install -c conda-forge poppler')
import streamlit as st
st.set_page_config(page_title="Summarization Tool", layout="wide", initial_sidebar_state="expanded")
st.title("Chatbot and Bangla/English Text Summarizer: Upload Images/Pdf or input texts to summarize!")
import torch
import docx2txt
from PIL import Image 
from PyPDF2 import PdfFileReader
from pdf2image import convert_from_bytes
import pdfplumber
#from line_cor import mark_region
import pdf2image
import requests
import cv2
import numpy as np
import pytesseract
import line_cor
import altair as alt
#pytesseract.pytesseract.tesseract_cmd = r"./Tesseract-OCR/tesseract.exe"
from PIL import Image
API_URL0 = "https://api-inference.huggingface.co/models/csebuetnlp/mT5_multilingual_XLSum"
headers0 = {"Authorization": "Bearer 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"}
API_URL1 = "https://api-inference.huggingface.co/models/Michael-Vptn/text-summarization-t5-base"
headers1 = {"Authorization": "Bearer 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"}
API_URL2 = "https://api-inference.huggingface.co/models/gpt2"
headers2 = {"Authorization": "Bearer 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"}
@st.cache
def read_pdf(file):
#     images=pdf2image.convert_from_path(file)
#     # print(type(images))
    pdfReader = PdfFileReader(file)
    count = pdfReader.numPages
    all_page_text = " "
    for i in range(count):
        page = pdfReader.getPage(i)
        all_page_text += page.extractText()+" "
    return all_page_text
# def read_pdf_with_pdfplumber(file):
#     # Open the uploaded PDF file with pdfplumber
#     with pdfplumber.open(file) as pdf:
#         extracted_text = ''
#         for page in pdf.pages:
#             extracted_text += page.extract_text()

#     # Display the extracted text
#     #st.text(extracted_text)
#     return extracted_text

def engsum(output):
    def query(payload):
        response = requests.post(API_URL1, headers=headers1, json=payload)
        return response.json()
        
    out = query({
        "inputs": output,
        "min_length":300
    })
    if isinstance(out, list) and out[0].get("generated_text"):
       text_output = out[0]["generated_text"]
       st.success(text_output)
def bansum(text):
    def query(payload):
        response = requests.post(API_URL0, headers=headers0, json=payload)
        return response.json()
    out = query({"inputs": text, "min_length":300})
    if isinstance(out, list) and out[0].get("summary_text"):
       text_output = out[0]["summary_text"]
       st.success(text_output) 
    
# @st.cache
# def save(l):
#     return l
#@st.cache
def main():
    camera_photo=None
    import streamlit as st
    if "photo" not in st.session_state:
        st.session_state["photo"]="not done"
    def change_photo_state():
        st.session_state["photo"]="done"
    with st.container():
        c1, c2, c3 = st.columns([2,2,1])
        message = c1.text_input("Type your text here!")
        Capture = True
        if c2.button("Stop Camera"):
           Capture =False
        if capture:
           camera_photo = c2.camera_input("Capture a photo to summarize: ", on_change=change_photo_state)
           st.image(camera_photo, caption='Uploaded Image', use_column_width=True)
           
        uploaded_photo = c3.file_uploader("Upload your Images/PDF",type=['jpg','png','jpeg','pdf'], on_change=change_photo_state)
        if st.session_state["photo"]=="done" or message:
            if uploaded_photo and uploaded_photo.type=='application/pdf':
                tet = read_pdf(uploaded_photo)
                # with tempfile.NamedTemporaryFile(delete=False) as temp_file:
                #     temp_file.write(uploaded_photo.read())
                #     temp_file_path = temp_file.name
                    
                # loader = PyPDFLoader(temp_file_path)
                # if loader:
                #     text.extend(loader.load())
                #     os.remove(temp_file_path)
                # text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=100, length_function=len)
                # text_chunks = text_splitter.split_documents(text)
                values = st.slider('Select a approximate number of lines to see and summarize',value=[0, len(tet)//(7*100)])
                text = tet[values[0]*7*10:values[1]*10*100] if values[0]!=len(tet)//(10*100) else tet[len(tet)//(10*100):]
                #st.success(type(text_chunks))
                if st.button("English Pdf Summarize"):
                    st.subheader("Selected text for summarize: ")
                    st.success(text)
                    st.subheader("Summarized Text: ")
                    engsum(text)
            
            elif uploaded_photo and uploaded_photo.type !='application/pdf':
                text=None
                img = Image.open(uploaded_photo)
                img = img.save("img.png")
                img = cv2.imread("img.png")
                st.text("Select the summarization type:")
                c4, c5 = st.columns([2,2])
                st.success(text)
                if c4.button("BENGALI"):
                    text =  pytesseract.image_to_string(img, lang="ben")
                    st.subheader("সারাংশ/সারমর্ম")
                    bansum(text)
                if c5.button("ENGLISH"): 
                    text=pytesseract.image_to_string(img)
                    st.subheader("Summarized Text")
                    engsum(text)
            elif camera_photo:
                text=None
                img = Image.open(camera_photo)
                img = img.save("img.png")
                img = cv2.imread("img.png")
                #text = pytesseract.image_to_string(img) if st.checkbox("Bangla") else pytesseract.image_to_string(img, lang="ben")
                st.text("Select the summarization type:")
                c6, c7 = st.columns([2,3])
                st.success(text)
                if c6.button("Bangla"):
                    text =  pytesseract.image_to_string(img, lang="ben")
                    st.subheader("সারাংশ/সারমর্ম")
                    bansum(text)
                if c7.button("English"): 
                    text=pytesseract.image_to_string(img)
                    st.subheader("Summarized Text")
                    engsum(text)
            else:
                text=None
                text = message
                c8, c9 = st.columns([2,3])
                if c8.button("Bangla"):
                    bansum(text)
                if c9.button("English"): 
                    engsum(text) 
             
    with st.container():
        from streamlit_chat import message as st_message
        from transformers import BlenderbotTokenizer
        from transformers import BlenderbotForConditionalGeneration
        st.title("Chatbot!!!")
        
        @st.experimental_singleton
        def get_models():
            # it may be necessary for other frameworks to cache the model
            # seems pytorch keeps an internal state of the conversation
            model_name = "facebook/blenderbot-400M-distill"
            tokenizer = BlenderbotTokenizer.from_pretrained(model_name)
            model = BlenderbotForConditionalGeneration.from_pretrained(model_name)
            return tokenizer, model
        if "history" not in st.session_state:
            st.session_state.history = []
       # st.title("Hello bot: ")
        def generate_answer():
            tokenizer, model = get_models()
            user_message = st.session_state.input_text
            inputs = tokenizer(st.session_state.input_text, return_tensors="pt")
            result = model.generate(**inputs)
            message_bot = tokenizer.decode(
                result[0], skip_special_tokens=True
            )  # .replace("<s>", "").replace("</s>", "")
            st.session_state.history.append({"message": user_message, "is_user": True})
            st.session_state.history.append({"message": message_bot, "is_user": False})
        from copyreg import clear_extension_cache
        for chat in st.session_state.history:
            st_message(**chat)
        st.text_input("Talk to the bot", key="input_text", on_change=generate_answer)
        if st.button("Refresh/New Chat"):
           st.session_state.history = None
            
if __name__ == "__main__":
    main()