File size: 2,292 Bytes
89ceeee
 
 
 
 
c30937f
89ceeee
 
c30937f
 
 
 
ca43ba3
89ceeee
c30937f
89ceeee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca43ba3
89ceeee
 
 
 
 
 
c30937f
89ceeee
 
 
 
 
 
 
 
 
 
 
 
 
 
ca43ba3
89ceeee
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import os
import pdfplumber
import gradio as gr
from transformers import pipeline
import torch
import spaces


zero = torch.Tensor([0]).cuda()
print(zero.device)

device = 0 if torch.cuda.is_available() else -1

# Load LLM model for classification
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli",device=device)

# Define request types and subtypes
request_types = [
    "Adjustment",
    "AU Transfer",
    "Closing Notice",
    "Commitment Change",
    "Fee Payment",
    "Money Movement - Inbound",
    "Money Movement - Outbound"
]

sub_request_types = {
    "Closing Notice": ["Reallocation Fees", "Amendment Fees", "Reallocation Principal"],
    "Commitment Change": ["Cashless Roll", "Decrease", "Increase"],
    "Fee Payment": ["Ongoing Fee", "Letter of Credit Fee"],
    "Money Movement - Inbound": ["Principal", "Interest", "Principal + Interest", "Principal + Interest + Fee"],
    "Money Movement - Outbound": ["Timebound", "Foreign Currency"]
}

# Function to extract text from PDFs

def extract_text_from_pdf(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
    return text

# Function to classify emails
@spaces.GPU
def classify_text(text):
    result = classifier(text, request_types)
    main_category = result["labels"][0]
    confidence = result["scores"][0]

    if main_category in sub_request_types.keys():
            sub_type_result = classifier(text, sub_request_types.get(main_category, []))
            sub_category = sub_type_result["labels"][0] if sub_type_result["labels"] else "Unknown"
    else:
        sub_category = "Unknown"
    
    return f"Request Type: {main_category}\nSub Request Type: {sub_category}\nConfidence Score: {confidence:.2f}"

# Gradio UI

def process_pdf(file):
    text = extract_text_from_pdf(file.name)
    return classify_text(text)

iface = gr.Interface(
    fn=process_pdf,
    inputs=gr.File(type="filepath", file_types=[".pdf"]),  # Use 'filepath' instead of 'file'
    outputs="text",
    title="Email Request Type Classification",
    description="Upload a PDF file containing loan servicing requests, and the model will classify its request type."
)

if __name__ == "__main__":
    iface.launch()