Spaces:
Sleeping
Sleeping
import os | |
import time | |
import uuid | |
import streamlit as st | |
import nltk | |
from helpers.entity_extraction_helpers import process_insurance_document | |
from services.pii_service import PIIService | |
from services.openai_service import OpenAIService | |
from services.mongo_service import MongoService | |
from services.ocr_service import OCRService | |
def init_session(): | |
print("------------------ Initializing") | |
if 'a' not in st.session_state: | |
st.session_state['pii_instance'] = PIIService() | |
print("PII service initialized") | |
time.sleep(2) | |
st.session_state['openai_instance'] = OpenAIService(st.secrets["OPENAI_KEY"], | |
st.secrets["OPENAI_AZURE_ENDPOINT"], | |
st.secrets["OPENAI_API_VERSION"], | |
st.secrets["DEPLOYMENT_NAME"]) | |
print("OpenAI service initialized") | |
time.sleep(2) | |
st.session_state['ocr_instance'] = OCRService(st.secrets["OCR_API_KEY"]) | |
print("OCR service initialized") | |
st.session_state.a = 1 | |
nltk.downloader.download('maxent_ne_chunker') | |
nltk.downloader.download('words') | |
nltk.downloader.download('treebank') | |
nltk.downloader.download('maxent_treebank_pos_tagger') | |
nltk.downloader.download('punkt') | |
nltk.download('averaged_perceptron_tagger') | |
print("-----------------------------") | |
st.header('', divider='rainbow') | |
st.title("Data extraction") | |
st.header('', divider='rainbow') | |
init_session() | |
uploaded_doc = st.file_uploader("Upload an insurance document", type=["pdf"]) | |
if uploaded_doc is not None: | |
with open(uploaded_doc.name,"wb") as f: | |
f.write(uploaded_doc.getbuffer()) | |
document_id = str(uuid.uuid4()) | |
print(f"File uploaded :: {uploaded_doc.name} :: {document_id}") | |
process_out = process_insurance_document(st.session_state['pii_instance'], "", st.session_state['openai_instance'], | |
st.session_state['ocr_instance'] , uploaded_doc.name, document_id) | |
st.header('Extracted entities !! ', divider='rainbow') | |
st.write(process_out['entities']) | |
st.header('', divider='rainbow') | |
st.header('Identified personal entities ', divider='rainbow') | |
st.write(process_out['masked_entities']) | |
st.header('Masked Text ', divider='rainbow') | |
st.write(process_out['masked_text']) | |
### TO RUN :: streamlit run ui_app.py |