File size: 5,111 Bytes
bff6e6c
fdcdc66
 
016347e
8cfb96c
fdcdc66
 
bff6e6c
 
 
fdcdc66
 
 
 
 
d2dc178
 
 
 
 
 
 
 
 
8550b33
 
 
 
 
 
 
 
 
6197cb4
fdcdc66
 
 
bff6e6c
fdcdc66
 
 
 
53a3910
 
41d3948
 
 
 
 
 
 
53a3910
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8c6d5ba
53a3910
 
8c6d5ba
 
8c40583
53a3910
 
 
d2dc178
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from fastapi import File, UploadFile
from typing import List

from transformers import pipeline

app = FastAPI()

pipe_flan = pipeline("text2text-generation", model="google/flan-t5-small")


@app.get("/infer_t5")
def t5(input):
    output = pipe_flan(input)
    return {"output": output[0]["generated_text"]}


# @app.post("/classify/")
#    async def classify_doc(file: UploadFile):
#        return {"file_size": len(file)}

@app.post("/classify/")
async def classify_doc(files: List[UploadFile] = File(...)):
    for file in files:
        try:
            contents = file.file.read()
            classify_res = classify_acct_dtype_str(contents.stream)
        except Exception:
            return {"message": "There was an error in uploading file(s)"}
        finally:
            file.file.close()
    return {"message": f"Successfuly uploaded {[classify_res for file in files]}"}

app.mount("/", StaticFiles(directory="static", html=True), name="static")

@app.get("/")
def index() -> FileResponse:
    return FileResponse(path="/app/static/index.html", media_type="text/html")




import re
import torch
from transformers import DonutProcessor, VisionEncoderDecoderModel
from datasets import load_dataset
import os
from PIL import Image


# Doc classifier model
classifier_doctype_processor = DonutProcessor.from_pretrained("calumpianojericho/donutclassifier_acctdocs_by_doctype")
classifier_doctype_model = VisionEncoderDecoderModel.from_pretrained("calumpianojericho/donutclassifier_acctdocs_by_doctype")


"""### Inference Code"""

def inference(input, model, processor, threshold=1.0, task_prompt="", get_confidence=False):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)
    is_confident = True
    decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids

    pil_img=input

    image = np.array(pil_img)
    pixel_values = processor(image, return_tensors="pt").pixel_values

    outputs = model.generate(
        pixel_values.to(device),
        decoder_input_ids=decoder_input_ids.to(device),
        max_length=model.decoder.config.max_position_embeddings,
        early_stopping=True,
        pad_token_id=processor.tokenizer.pad_token_id,
        eos_token_id= processor.tokenizer.eos_token_id,
        use_cache=True,
        num_beams=1,
        bad_words_ids=[[processor.tokenizer.unk_token_id]],
        return_dict_in_generate=True,
        output_scores=True,
    )

    sequence = processor.batch_decode(outputs.sequences)[0]
    sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
    sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token

    seq = processor.token2json(sequence)
    if get_confidence:
        return seq, pred_confidence(outputs.scores, threshold)

    return seq

def pred_confidence(output_scores, threshold):
    is_confident=True

    for score in output_scores:
        exp_scores = np.exp(score[0].cpu().numpy()) # scores are logits, we use the exp function so that all values are positive
        sum_exp = np.sum(exp_scores) # taking the sum of the token scores
        idx = np.argmax(exp_scores) # taking the index of the token with the highest score
        prob_max = exp_scores[idx]/sum_exp # normalizing the token with the highest score wrt the sum of all scores. Returns probability
        if prob_max < threshold:
            is_confident = False
            # print(prob_max)


    return is_confident


CUDA_LAUNCH_BLOCKING=1
def parse_text(input, filename):
    model = base_model
    processor = base_processor
    seq = inference(input, model, processor, task_prompt="<s_synthdog>")
    return str(seq)

def doctype_classify(input, filename):
    model = classifier_doctype_model
    processor = classifier_doctype_processor
    seq, is_confident = inference(input, model, processor, threshold=0.90, task_prompt="<s_classifier_acct>", get_confidence=True)
    return seq.get('class'), is_confident

def account_classify(input, filename):
    model = classifier_account_model
    processor = classifier_account_processor
    seq, is_confident = inference(input, model, processor, threshold=0.999, task_prompt="<s_classifier_acct>", get_confidence=True)
    return seq.get('class'), is_confident



"""## Text processing/string matcher code"""

import locale
locale.getpreferredencoding = lambda: "UTF-8"


"""## Classify Document Images"""

import numpy as np
import csv
import re
import os
import requests
from io import BytesIO

def classify_acct_dtype_str(input_path):
    response = requests.get("https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg")
    ipt = Image.open(BytesIO(response.content))
    dtype_inf, dtype_conf = doctype_classify(ipt, "city-streets.jpg")

    return dtype_inf

# classify_acct_dtype_str("https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg")