from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse

from transformers import pipeline

app = FastAPI()

pipe_flan = pipeline("text2text-generation", model="google/flan-t5-small")


@app.get("/infer_t5")
def t5(input):
    # output = pipe_flan(input)
    # return {"output": output[0]["generated_text"]}
    return classify_acct_dtype_str("https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg")


app.mount("/", StaticFiles(directory="static", html=True), name="static")

@app.get("/")
def index() -> FileResponse:
    return FileResponse(path="/app/static/index.html", media_type="text/html")


# Doc classifier model
classifier_doctype_processor = DonutProcessor.from_pretrained("calumpianojericho/donutclassifier_acctdocs_by_doctype")
classifier_doctype_model = VisionEncoderDecoderModel.from_pretrained("calumpianojericho/donutclassifier_acctdocs_by_doctype")


"""### Inference Code"""

def inference(input, model, processor, threshold=1.0, task_prompt="", get_confidence=False):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)
    is_confident = True
    decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids

    pil_img=input

    image = np.array(pil_img)
    pixel_values = processor(image, return_tensors="pt").pixel_values

    outputs = model.generate(
        pixel_values.to(device),
        decoder_input_ids=decoder_input_ids.to(device),
        max_length=model.decoder.config.max_position_embeddings,
        early_stopping=True,
        pad_token_id=processor.tokenizer.pad_token_id,
        eos_token_id= processor.tokenizer.eos_token_id,
        use_cache=True,
        num_beams=1,
        bad_words_ids=[[processor.tokenizer.unk_token_id]],
        return_dict_in_generate=True,
        output_scores=True,
    )

    sequence = processor.batch_decode(outputs.sequences)[0]
    sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
    sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token

    seq = processor.token2json(sequence)
    if get_confidence:
        return seq, pred_confidence(outputs.scores, threshold)

    return seq

def pred_confidence(output_scores, threshold):
    is_confident=True

    for score in output_scores:
        exp_scores = np.exp(score[0].cpu().numpy()) # scores are logits, we use the exp function so that all values are positive
        sum_exp = np.sum(exp_scores) # taking the sum of the token scores
        idx = np.argmax(exp_scores) # taking the index of the token with the highest score
        prob_max = exp_scores[idx]/sum_exp # normalizing the token with the highest score wrt the sum of all scores. Returns probability
        if prob_max < threshold:
            is_confident = False
            # print(prob_max)


    return is_confident


CUDA_LAUNCH_BLOCKING=1
def parse_text(input, filename):
    model = base_model
    processor = base_processor
    seq = inference(input, model, processor, task_prompt="<s_synthdog>")
    return str(seq)

def doctype_classify(input, filename):
    model = classifier_doctype_model
    processor = classifier_doctype_processor
    seq, is_confident = inference(input, model, processor, threshold=0.90, task_prompt="<s_classifier_acct>", get_confidence=True)
    return seq.get('class'), is_confident

def account_classify(input, filename):
    model = classifier_account_model
    processor = classifier_account_processor
    seq, is_confident = inference(input, model, processor, threshold=0.999, task_prompt="<s_classifier_acct>", get_confidence=True)
    return seq.get('class'), is_confident

"""## Text processing/string matcher code"""

import locale
locale.getpreferredencoding = lambda: "UTF-8"


"""## Text processing/string matcher code"""

import locale
locale.getpreferredencoding = lambda: "UTF-8"


"""## Classify Document Images"""

import numpy as np
import csv
import re
import os


import requests

def classify_acct_dtype_str(input_path):
    response = requests.get("https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg")
    dtype_inf, dtype_conf = doctype_classify(response, "city-streets.jpg")

    return dtype_inf

classify_acct_dtype_str("https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg")