import os import time import uuid import streamlit as st import nltk from helpers.entity_extraction_helpers import process_insurance_document from services.pii_service import PIIService from services.openai_service import OpenAIService from services.mongo_service import MongoService from services.ocr_service import OCRService def init_session(): print("------------------ Initializing") if 'a' not in st.session_state: st.session_state['pii_instance'] = PIIService() print("PII service initialized") time.sleep(2) st.session_state['openai_instance'] = OpenAIService(st.secrets["OPENAI_KEY"], st.secrets["OPENAI_AZURE_ENDPOINT"], st.secrets["OPENAI_API_VERSION"], st.secrets["DEPLOYMENT_NAME"]) print("OpenAI service initialized") time.sleep(2) st.session_state['ocr_instance'] = OCRService(st.secrets["OCR_API_KEY"]) print("OCR service initialized") st.session_state.a = 1 nltk.downloader.download('maxent_ne_chunker') nltk.downloader.download('words') nltk.downloader.download('treebank') nltk.downloader.download('maxent_treebank_pos_tagger') nltk.downloader.download('punkt') nltk.download('averaged_perceptron_tagger') print("-----------------------------") st.header('', divider='rainbow') st.title("Data extraction") st.header('', divider='rainbow') init_session() uploaded_doc = st.file_uploader("Upload an insurance document", type=["pdf"]) if uploaded_doc is not None: with open(uploaded_doc.name,"wb") as f: f.write(uploaded_doc.getbuffer()) document_id = str(uuid.uuid4()) print(f"File uploaded :: {uploaded_doc.name} :: {document_id}") process_out = process_insurance_document(st.session_state['pii_instance'], "", st.session_state['openai_instance'], st.session_state['ocr_instance'] , uploaded_doc.name, document_id) st.header('Extracted entities !! ', divider='rainbow') st.write(process_out['entities']) st.header('', divider='rainbow') st.header('Identified personal entities ', divider='rainbow') st.write(process_out['masked_entities']) st.header('Masked Text ', divider='rainbow') st.write(process_out['masked_text']) ### TO RUN :: streamlit run ui_app.py