Spaces:
Sleeping
Sleeping
| import os | |
| import pdfplumber | |
| import gradio as gr | |
| from transformers import pipeline | |
| import torch | |
| import spaces | |
| zero = torch.Tensor([0]).cuda() | |
| print(zero.device) | |
| device = 0 if torch.cuda.is_available() else -1 | |
| # Load LLM model for classification | |
| classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli",device=device) | |
| # Define request types and subtypes | |
| request_types = [ | |
| "Adjustment", | |
| "AU Transfer", | |
| "Closing Notice", | |
| "Commitment Change", | |
| "Fee Payment", | |
| "Money Movement - Inbound", | |
| "Money Movement - Outbound" | |
| ] | |
| sub_request_types = { | |
| "Closing Notice": ["Reallocation Fees", "Amendment Fees", "Reallocation Principal"], | |
| "Commitment Change": ["Cashless Roll", "Decrease", "Increase"], | |
| "Fee Payment": ["Ongoing Fee", "Letter of Credit Fee"], | |
| "Money Movement - Inbound": ["Principal", "Interest", "Principal + Interest", "Principal + Interest + Fee"], | |
| "Money Movement - Outbound": ["Timebound", "Foreign Currency"] | |
| } | |
| # Function to extract text from PDFs | |
| def extract_text_from_pdf(pdf_path): | |
| with pdfplumber.open(pdf_path) as pdf: | |
| text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text()) | |
| return text | |
| # Function to classify emails | |
| def classify_text(text): | |
| result = classifier(text, request_types) | |
| main_category = result["labels"][0] | |
| confidence = result["scores"][0] | |
| if main_category in sub_request_types.keys(): | |
| sub_type_result = classifier(text, sub_request_types.get(main_category, [])) | |
| sub_category = sub_type_result["labels"][0] if sub_type_result["labels"] else "Unknown" | |
| else: | |
| sub_category = "Unknown" | |
| return f"Request Type: {main_category}\nSub Request Type: {sub_category}\nConfidence Score: {confidence:.2f}" | |
| # Gradio UI | |
| def process_pdf(file): | |
| text = extract_text_from_pdf(file.name) | |
| return classify_text(text) | |
| iface = gr.Interface( | |
| fn=process_pdf, | |
| inputs=gr.File(type="filepath", file_types=[".pdf"]), # Use 'filepath' instead of 'file' | |
| outputs="text", | |
| title="Email Request Type Classification", | |
| description="Upload a PDF file containing loan servicing requests, and the model will classify its request type." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() |