from predict import run_prediction
from io import StringIO
import json
import gradio as gr
import spacy
from spacy import displacy
from transformers import RobertaTokenizer,pipeline
import torch
import nltk
from nltk.tokenize import sent_tokenize
from fin_readability_sustainability import BERTClass, do_predict
import pandas as pd
import en_core_web_sm
from score_fincat import score_fincat
from sus_fls import get_sustainability,fls
nlp = en_core_web_sm.load()
nltk.download('punkt')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


##Summarization
summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY") 
def summarize_text(text):
    resp = summarizer(text)
    stext = resp[0]['summary_text']
    return stext

    
##Company Extraction
ner=pipeline('ner',model='Jean-Baptiste/camembert-ner-with-dates',tokenizer='Jean-Baptiste/camembert-ner-with-dates', aggregation_strategy="simple")
def fin_ner(text):
    replaced_spans = ner(text)
    new_spans=[]
    for item in replaced_spans:
        item['entity']=item['entity_group']
        del item['entity_group']
        new_spans.append(item)
    return {"text": text, "entities": new_spans}
    
     
#CUAD STARTS    
def load_questions():
    questions = []
    with open('questions.txt') as f:
        questions = f.readlines()
    return questions


def load_questions_short():
    questions_short = []
    with open('questionshort.txt') as f:
        questions_short = f.readlines()
    return questions_short
questions = load_questions()
questions_short = load_questions_short()


def quad(query,file):
    with open(file.name) as f:
        paragraph = f.read()
    questions = load_questions()
    questions_short = load_questions_short()
    if (not len(paragraph)==0) and not (len(query)==0):
        print('getting predictions')
    predictions = run_prediction([query], paragraph, 'marshmellow77/roberta-base-cuad',n_best_size=5)
    answer = ""
    answer_p=""
    if predictions['0'] == "":
        answer = 'No answer found in document'
    else:
        with open("nbest.json") as jf:
            data = json.load(jf)
            for i in range(1):
                raw_answer=data['0'][i]['text']
                answer += f"{data['0'][i]['text']} -- \n"
                answer_p =answer+ f"Probability: {round(data['0'][i]['probability']*100,1)}%\n\n"            
    return answer_p,summarize_text(answer),fin_ner(answer),score_fincat(answer),get_sustainability(answer),fls(answer)    
                
                   
iface = gr.Interface(fn=quad, inputs=[gr.Dropdown(choices=questions_short,label='SEARCH QUERY'),gr.inputs.File(label='TXT FILE')], title="CONBERT",description="CONTRACT REVIEW TOOL",article='Article', outputs=[gr.outputs.Textbox(label='Answer'),gr.outputs.Textbox(label='Summary'),gr.HighlightedText(label='NER'),gr.HighlightedText(label='CLAIM'),gr.HighlightedText(label='SUSTAINABILITY'),gr.HighlightedText(label='FLS')], allow_flagging="never")


iface.launch()