import streamlit as st
import time
from annotated_text import annotated_text
from io import StringIO
from transformers import AutoTokenizer, AutoModelForTokenClassification
import os
from streamlit_text_annotation import text_annotation

os.environ['KMP_DUPLICATE_LIB_OK']='True'

import plotly.express as px
from streamlit_option_menu import option_menu

st. set_page_config(layout="wide")

from transformers import pipeline
import pandas as pd

@st.cache(allow_output_mutation = True)
def init_text_summarization_model():
    MODEL = 'facebook/bart-large-cnn'
    pipe = pipeline("summarization", model=MODEL)
    return pipe

@st.cache(allow_output_mutation = True)
def init_zsl_topic_classification():
    MODEL = 'facebook/bart-large-mnli'
    pipe = pipeline("zero-shot-classification", model=MODEL)
    template = "This text is about {}."
    return pipe, template

@st.cache(allow_output_mutation = True)
def init_zsl_topic_classification():
    MODEL = 'facebook/bart-large-mnli'
    pipe = pipeline("zero-shot-classification", model=MODEL)
    template = "This text is about {}."
    return pipe, template

@st.cache(allow_output_mutation = True)
def init_ner_pipeline():
    tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
    model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")
    pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple") # pass device=0 if using gpu
    return pipe

@st.cache(allow_output_mutation = True)
def init_qa_pipeline():
    question_answerer_pipe = pipeline("question-answering", model='deepset/roberta-base-squad2')
    return question_answerer_pipe

def get_formatted_text_for_annotation(output):
    colour_map = {'Coreference': '#29D93B',
    'Severity':'#FCF3CF',
 'Sex': '#E9F7EF',
 'Sign_symptom': '#EAF2F8',
 'Detailed_description': '#078E8B',
 'Date': '#F5EEF8',
 'History': '#FDEDEC',
 'Medication': '#F4F6F6',
 'Therapeutic_procedure': '#A3E4D7',
 'Age': '#85C1E9',
 'Subject': '#D7BDE2',
 'Biological_structure': '#AF7AC5',
 'Activity': '#B2BABB',
 'Lab_value': '#E6B0AA',
 'Family_history': '#2471A3',
 'Diagnostic_procedure': '#CCD1D1',
 'Other_event': '#239B56',
 'Occupation': '#B3B6B7'}
    
    annotated_texts = []
    next_index = 0
    for entity in output:
        if entity['start'] == next_index:
    #         print("found entity")
            extracted_text = text[entity['start']:entity['end']]
    #         print("annotated",annotated_text)
            annotated_texts.append((extracted_text ,entity['entity_group'],colour_map[entity['entity_group']]))
        else:
            unannotated_text = text[next_index:entity['start']-1]
            annotated_texts.append(unannotated_text)
            extracted_text = text[entity['start']:entity['end']]
            annotated_texts.append((extracted_text ,entity['entity_group'],colour_map[entity['entity_group']]))
            next_index =entity['end'] +1
    
    if next_index < len(text):
        annotated_texts.append(text[next_index-1:len(text)-1])
        
    return tuple(annotated_texts)
    
# Model initialization    
pipeline_summarization = init_text_summarization_model()
pipeline_zsl, template = init_zsl_topic_classification()
pipeline_ner =init_ner_pipeline()
pipeline_qa = init_qa_pipeline()

st.header("Intelligent Document Automation")


with st.sidebar:
    selected_menu = option_menu("Select Option", 
    ["Upload Document", "Extract Text", "Summarize Document", "Extract Entities","Detected Barriers","Get Answers","Annotation Tool"], 
        menu_icon="cast", default_index=0)
    

if selected_menu == "Upload Document":
    uploaded_file = st.file_uploader("Choose a file")        
    if uploaded_file is not None:
        ocr_text  = get_text_from_ocr_engine()
        st.write("Upload Successful")
        
elif selected_menu == "Extract Text":
    with st.spinner("Extracting Text..."):
        time.sleep(6)
        st.write(get_text_from_ocr_engine())
        
elif selected_menu == "Summarize Document":
    paragraphs= get_paragraphs_for_summaries()
    
    with st.spinner("Finding Topics..."):
        tags_found = ["Injury Details", "Past Medical Conditions", "Injury Management Plan", "GP Correspondence"]
        time.sleep(5)
        st.write("This document is about:")
        st.markdown(";".join(["#" + tag + " "  for tag in tags_found]) + "**")
        st.markdown("""---""")
        
    with st.spinner("Summarizing Document..."):
        
        
        for text in paragraphs:
            summary_text = pipeline_summarization(text, max_length=130, min_length=30, do_sample=False)
            # Show output
            st.write(summary_text[0]['summary_text'])
            st.markdown("""---""")
     
        
elif selected_menu == "Extract Entities":
    paragraphs= get_paragraphs_for_entities()
    
    with st.spinner("Extracting Entities..."):
        for text in paragraphs:
            output = pipeline_ner (text)
            entities_text =get_formatted_text_for_annotation(output)
            annotated_text(*entities_text)
            st.markdown("""---""")
            
elif selected_menu == "Detected Barriers":
    #st.subheader('Barriers Detected')
    barriers_to_detect = {"Chronic Pain":"Is the patint experiencing chronic pain?",
                          "Mental Health Issues":"Does he have any mental issues?",
                          "Prior History":"What is prior medical history?",
                          "Smoking":"Does he smoke?",
                          "Drinking":"Does he drink?",
                          "Comorbidities":"Does he have any comorbidities?"}
    
    with st.spinner("Detecting Barriers..."):                                            
        for barrier,question_text in barriers_to_detect.items():
        
            context = get_text_from_ocr_engine()
            if question_text:
                result = pipeline_qa(question=question_text,  context=context)
                st.subheader(barrier)
                #st.text(result)
                if result['score'] < 0.3:
                    st.text("Not Found")
                else:
                    st.text(result['answer']) 

elif selected_menu == "Get Answers":
    st.subheader('Question')
    question_text = st.text_input("Type your question")
    context = get_text_from_ocr_engine()
  
    if question_text:
        with st.spinner("Finding Answer(s)..."):
            result = pipeline_qa(question=question_text,  context=context)
            st.subheader('Answer')
            st.text(result['answer'])
elif selected_menu == "Annotation Tool":
    data1 = {
    "tokens": [
    {"text": "He", "labels": ["Person"]},
    {"text": "loves"},
    {"text": "his"},
    {"text": "dog", "labels": ["Animal", "Pet"]},
    ],
    "labels": [
    {"text": "Person"},
    {"text": "Action"},
    {"text": "Animal"},
    ]
    }
    
    st.subheader("Display Mode:")
    left, right = st.columns(2)
    with left:
        st.text("Vertical labels:")
        text_annotation(data1)
    with right:
        st.text("Horizontal labels:")
        data1["labelOrientation"] = "horizontal"
        text_annotation(data1)


    data2 = {
    "allowEditing": True,
    "tokens": [
    {"text": "He", "labels": ["Pronoun", "Person"]},
    {"text": "loves", "labels": ["Action"]},
    {"text": "his"},
    {"text": "dog", "labels": ["Animal"]},
    ],
    "labels": [
    {"text": "Pronoun", "style": {
    "color": "red",
    "background-color": "white",
    "font-size": "8px",
    "border": "3px dashed red",
    }},
    {"text": "Verb", "style": {
    "color": "green",
    "background-color": "white",
    "font-size": "8px",
    "font-weight": "900",
    }},
    {"text": "Noun", "style": {
    "color": "blue",
    "background-color": "white",
    "font-size": "8px",
    }},
    {"text": "Person"},
    {"text": "Animal"},
    ]
    }
    
    st.subheader("Edit Mode:")
    data = text_annotation(data2)
    if data:
        "Returned data:", data