|
import os |
|
os.environ['KMP_DUPLICATE_LIB_OK']='True' |
|
import streamlit as st |
|
|
|
|
|
from streamlit.runtime.scriptrunner.script_run_context import get_script_run_ctx |
|
from datetime import datetime |
|
import pandas as pd |
|
from PyPDF2 import PdfReader |
|
from langchain.text_splitter import CharacterTextSplitter,RecursiveCharacterTextSplitter |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@st.cache_data |
|
def get_text_chunks(pdf_docs,chunk_size=2000,overlap=0): |
|
documents = "" |
|
for pdf in pdf_docs: |
|
pdf_reader = PdfReader(pdf) |
|
for page in pdf_reader.pages: |
|
documents += page.extract_text() |
|
|
|
|
|
|
|
|
|
|
|
|
|
text_splitter = RecursiveCharacterTextSplitter( |
|
|
|
chunk_size = chunk_size, |
|
chunk_overlap = overlap) |
|
|
|
texts = text_splitter.split_text(documents) |
|
return texts |
|
|
|
def build_experimental_ui(): |
|
|
|
with st.sidebar: |
|
tabs = st.sidebar.selectbox('SELECT TASK', [ |
|
"Question & Answer", |
|
"Question & Answer (Chat Mode)", |
|
"Transcript Intelligence", |
|
]) |
|
st.markdown('---') |
|
|
|
if tabs=='Question & Answer': |
|
selected_model = st.selectbox("Select Model:", options=[], index=0) |
|
selected_embeddings = st.selectbox("Select Embeddings:", options=[], index=0) |
|
|
|
strategy = '' |
|
elif tabs=='Question & Answer (Chat Mode)': |
|
selected_model = st.selectbox("Select Model:", options=[], index=0) |
|
selected_embeddings = st.selectbox("Select Embeddings:", options=[], index=0) |
|
elif tabs == 'Transcript Intelligence': |
|
selected_model = st.selectbox("Select Model:", options=[], index=0) |
|
|
|
|
|
|
|
pdf_docs = st.file_uploader('Upload a PDF file', type=['pdf'],accept_multiple_files=True) |
|
st.session_state['pdf_file'] = pdf_docs |
|
Process = st.button("Process", disabled=(pdf_docs==[])) |
|
|
|
if Process: |
|
if pdf_docs: |
|
if pdf_docs!=st.session_state['pdf_file']: |
|
st.session_state['pdf_file'] = pdf_docs |
|
with st.spinner('Creating embeddings...'): |
|
texts = get_text_chunks(pdf_docs=pdf_docs) |
|
|
|
|
|
|
|
|
|
st.error("Disclaimer: All data processed in this application will be sent to OpenAI API based in the United States.") |
|
|
|
st.markdown('## '+tabs) |
|
|
|
if tabs=='Question & Answer': |
|
|
|
st.markdown('---') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompt = st.text_input('Input your prompt', disabled=False, key="text") |
|
|
|
questions_file = st.file_uploader('Upload a CSV file with questions', type=['csv'],accept_multiple_files=False) |
|
|
|
if questions_file: |
|
questions_df = pd.read_csv(questions_file) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
button_query = st.button('Submit', disabled=False) |
|
|
|
if button_query: |
|
|
|
for question in questions_df['question']: |
|
instruction = f'{prompt}.Question:{question}' |
|
|
|
return |
|
|
|
|
|
else: |
|
st.info("Under Development") |
|
|
|
build_experimental_ui() |