Spaces:

ConradLax
/

document_classifier

Runtime error

App Files Files Community

ConradLax commited on Dec 18, 2023

Commit

53a3910

1 Parent(s): fdcdc66

test: classification model

Browse files

Files changed (1) hide show

main.py +114 -5

main.py CHANGED Viewed

@@ -11,8 +11,9 @@ pipe_flan = pipeline("text2text-generation", model="google/flan-t5-small")
 @app.get("/infer_t5")
 def t5(input):
-    output = pipe_flan(input)
-    return {"output": output[0]["generated_text"]}
 app.mount("/", StaticFiles(directory="static", html=True), name="static")
@@ -22,6 +23,114 @@ def index() -> FileResponse:
     return FileResponse(path="/app/static/index.html", media_type="text/html")
-#@app.get("/")
-#def read_root():
-#    return {"Hello": "World!"}

 @app.get("/infer_t5")
 def t5(input):
+    # output = pipe_flan(input)
+    # return {"output": output[0]["generated_text"]}
+    return classify_acct_dtype_str("https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg")
 app.mount("/", StaticFiles(directory="static", html=True), name="static")
     return FileResponse(path="/app/static/index.html", media_type="text/html")
+# Doc classifier model
+classifier_doctype_processor = DonutProcessor.from_pretrained("calumpianojericho/donutclassifier_acctdocs_by_doctype")
+classifier_doctype_model = VisionEncoderDecoderModel.from_pretrained("calumpianojericho/donutclassifier_acctdocs_by_doctype")
+"""### Inference Code"""
+def inference(input, model, processor, threshold=1.0, task_prompt="", get_confidence=False):
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model.to(device)
+    is_confident = True
+    decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
+    pil_img=input
+    image = np.array(pil_img)
+    pixel_values = processor(image, return_tensors="pt").pixel_values
+    outputs = model.generate(
+        pixel_values.to(device),
+        decoder_input_ids=decoder_input_ids.to(device),
+        max_length=model.decoder.config.max_position_embeddings,
+        early_stopping=True,
+        pad_token_id=processor.tokenizer.pad_token_id,
+        eos_token_id= processor.tokenizer.eos_token_id,
+        use_cache=True,
+        num_beams=1,
+        bad_words_ids=[[processor.tokenizer.unk_token_id]],
+        return_dict_in_generate=True,
+        output_scores=True,
+    )
+    sequence = processor.batch_decode(outputs.sequences)[0]
+    sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+    sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()  # remove first task start token
+    seq = processor.token2json(sequence)
+    if get_confidence:
+        return seq, pred_confidence(outputs.scores, threshold)
+    return seq
+def pred_confidence(output_scores, threshold):
+    is_confident=True
+    for score in output_scores:
+        exp_scores = np.exp(score[0].cpu().numpy()) # scores are logits, we use the exp function so that all values are positive
+        sum_exp = np.sum(exp_scores) # taking the sum of the token scores
+        idx = np.argmax(exp_scores) # taking the index of the token with the highest score
+        prob_max = exp_scores[idx]/sum_exp # normalizing the token with the highest score wrt the sum of all scores. Returns probability
+        if prob_max < threshold:
+            is_confident = False
+            # print(prob_max)
+    return is_confident
+CUDA_LAUNCH_BLOCKING=1
+def parse_text(input, filename):
+    model = base_model
+    processor = base_processor
+    seq = inference(input, model, processor, task_prompt="<s_synthdog>")
+    return str(seq)
+def doctype_classify(input, filename):
+    model = classifier_doctype_model
+    processor = classifier_doctype_processor
+    seq, is_confident = inference(input, model, processor, threshold=0.90, task_prompt="<s_classifier_acct>", get_confidence=True)
+    return seq.get('class'), is_confident
+def account_classify(input, filename):
+    model = classifier_account_model
+    processor = classifier_account_processor
+    seq, is_confident = inference(input, model, processor, threshold=0.999, task_prompt="<s_classifier_acct>", get_confidence=True)
+    return seq.get('class'), is_confident
+"""## Text processing/string matcher code"""
+import locale
+locale.getpreferredencoding = lambda: "UTF-8"
+"""## Text processing/string matcher code"""
+import locale
+locale.getpreferredencoding = lambda: "UTF-8"
+"""## Classify Document Images"""
+import numpy as np
+import csv
+import re
+import os
+import requests
+def classify_acct_dtype_str(input_path):
+    response = requests.get("https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg")
+    dtype_inf, dtype_conf = doctype_classify(response, "city-streets.jpg")
+    return dtype_inf
+classify_acct_dtype_str("https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg")