adit94's picture
Update app.py
e3ab28a verified
raw
history blame
2.44 kB
import os
import time
import uuid
import streamlit as st
import nltk
from helpers.entity_extraction_helpers import process_insurance_document
from services.pii_service import PIIService
from services.openai_service import OpenAIService
from services.mongo_service import MongoService
from services.ocr_service import OCRService
def init_session():
print("------------------ Initializing")
if 'a' not in st.session_state:
st.session_state['pii_instance'] = PIIService()
print("PII service initialized")
time.sleep(2)
st.session_state['openai_instance'] = OpenAIService(st.secrets["OPENAI_KEY"],
st.secrets["OPENAI_AZURE_ENDPOINT"],
st.secrets["OPENAI_API_VERSION"],
st.secrets["DEPLOYMENT_NAME"])
print("OpenAI service initialized")
time.sleep(2)
st.session_state['ocr_instance'] = OCRService(st.secrets["OCR_API_KEY"])
print("OCR service initialized")
st.session_state.a = 1
nltk.downloader.download('maxent_ne_chunker')
nltk.downloader.download('words')
nltk.downloader.download('treebank')
nltk.downloader.download('maxent_treebank_pos_tagger')
nltk.downloader.download('punkt')
nltk.download('averaged_perceptron_tagger')
print("-----------------------------")
st.header('', divider='rainbow')
st.title("Data extraction")
st.header('', divider='rainbow')
init_session()
uploaded_doc = st.file_uploader("Upload an insurance document", type=["pdf"])
if uploaded_doc is not None:
with open(uploaded_doc.name,"wb") as f:
f.write(uploaded_doc.getbuffer())
document_id = str(uuid.uuid4())
print(f"File uploaded :: {uploaded_doc.name} :: {document_id}")
process_out = process_insurance_document(st.session_state['pii_instance'], "", st.session_state['openai_instance'],
st.session_state['ocr_instance'] , uploaded_doc.name, document_id)
st.header('Extracted entities !! ', divider='rainbow')
st.write(process_out['entities'])
st.header('', divider='rainbow')
st.header('Identified personal entities ', divider='rainbow')
st.write(process_out['masked_entities'])
st.header('Masked Text ', divider='rainbow')
st.write(process_out['masked_text'])
### TO RUN :: streamlit run ui_app.py