Spaces:

Alexvatti
/

LLM-Email-Category

Sleeping

File size: 2,292 Bytes

89ceeee
 
 
 
 
c30937f
89ceeee
 
c30937f
 
 
 
ca43ba3
89ceeee
c30937f
89ceeee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca43ba3
89ceeee
 
 
 
 
 
c30937f
89ceeee
 
 
 
 
 
 
 
 
 
 
 
 
 
ca43ba3
89ceeee

import os
import pdfplumber
import gradio as gr
from transformers import pipeline
import torch
import spaces


zero = torch.Tensor([0]).cuda()
print(zero.device)

device = 0 if torch.cuda.is_available() else -1

# Load LLM model for classification
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli",device=device)

# Define request types and subtypes
request_types = [
    "Adjustment",
    "AU Transfer",
    "Closing Notice",
    "Commitment Change",
    "Fee Payment",
    "Money Movement - Inbound",
    "Money Movement - Outbound"
]

sub_request_types = {
    "Closing Notice": ["Reallocation Fees", "Amendment Fees", "Reallocation Principal"],
    "Commitment Change": ["Cashless Roll", "Decrease", "Increase"],
    "Fee Payment": ["Ongoing Fee", "Letter of Credit Fee"],
    "Money Movement - Inbound": ["Principal", "Interest", "Principal + Interest", "Principal + Interest + Fee"],
    "Money Movement - Outbound": ["Timebound", "Foreign Currency"]
}

# Function to extract text from PDFs

def extract_text_from_pdf(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
    return text

# Function to classify emails
@spaces.GPU
def classify_text(text):
    result = classifier(text, request_types)
    main_category = result["labels"][0]
    confidence = result["scores"][0]

    if main_category in sub_request_types.keys():
            sub_type_result = classifier(text, sub_request_types.get(main_category, []))
            sub_category = sub_type_result["labels"][0] if sub_type_result["labels"] else "Unknown"
    else:
        sub_category = "Unknown"
    
    return f"Request Type: {main_category}\nSub Request Type: {sub_category}\nConfidence Score: {confidence:.2f}"

# Gradio UI

def process_pdf(file):
    text = extract_text_from_pdf(file.name)
    return classify_text(text)

iface = gr.Interface(
    fn=process_pdf,
    inputs=gr.File(type="filepath", file_types=[".pdf"]),  # Use 'filepath' instead of 'file'
    outputs="text",
    title="Email Request Type Classification",
    description="Upload a PDF file containing loan servicing requests, and the model will classify its request type."
)

if __name__ == "__main__":
    iface.launch()