eagle0504's picture
chunk size added as input arg
2ecca1e
raw
history blame
No virus
5.58 kB
import math
import os
from datetime import datetime
import openai
import PyPDF2
import streamlit as st
from openai import OpenAI
from helper.utils import *
st.set_page_config(layout="wide", page_title="Document Search using QIMπŸ€–πŸ“–")
st.header("Document Search using Quantized Influence Measure (QIM)πŸ€–πŸ“–")
st.write("---")
# Streamlit sidebar setup for user interface
with st.sidebar:
# Create an expandable instruction manual section in the sidebar
with st.expander("Instruction Manual πŸ“–"):
# Display the instruction manual for the Document Data Chatbot in a formatted markdown
st.markdown(
"""
# Document Data Chatbot User Manual πŸ€–πŸ“–
Welcome to the Document Data Chatbot, your interactive assistant for information on the textual "Document Data". This chatbot offers quick and accurate responses to your queries. Follow these steps to interact with the chatbot:
## Getting Started πŸš€
1. **Access the Chatbot**: Launch the Document Data Chatbot on your device.
2. **Start Chatting**: Type your Document Data-related questions in the chat window. Questions can range from dosage to side effects.
3. **Send Your Question**: Submit your query by clicking 'Send' or pressing 'Enter'.
## Chatting with Document Data Chatbot πŸ€”πŸ’¬
- **Ask Anything**: Inquiries about textual composition, usage, storage, or safety are all welcome.
- **Use Simple Language**: Clear and concise questions yield the best results.
- **Wait for the Response**: The chatbot will promptly process and answer your query.
- **Follow-Up Questions**: Feel free to ask additional or new questions anytime.
## Tips for a Better Experience ✨
- **Be Specific**: Specific questions help in getting precise answers.
- **Check for Typing Errors**: Correct spelling ensures better understanding by the chatbot.
- **Emoji Use**: Emojis are welcome in your questions!
- **Patience is Key**: Responses may take a moment as the chatbot processes your query.
## Support and Feedback 🀝
- **Need Help?**: Contact our support team for any issues.
- **Share Your Feedback**: Your input is valuable and helps us improve.
## The Team Behind the App πŸ§‘β€πŸ’»πŸ‘©β€πŸ’»
- **Founders**: Learn about [Peter Yin](https://www.linkedin.com/in/peter-yin-7914ba25/) and [Yiqiao Yin](https://www.linkedin.com/in/yiqiaoyin/), the founders, on LinkedIn.
Thank you for choosing the Document Data Chatbot. We're here to provide all the information you need about Document Data efficiently. Happy chatting! πŸŽ‰πŸ’¬
"""
)
# File uploader widget allowing users to upload text and PDF documents
uploaded_files = st.file_uploader(
"Upload documents", accept_multiple_files=True, type=["txt", "pdf"]
)
# Inform the user how many documents have been loaded
st.success(f"{len(uploaded_files)} document(s) loaded...")
# Chunk size
chunk_size_input = st.number_input(
"Insert an integer (for size of chunks):", value=10, step=1
)
# Input filter
top_n = st.number_input(
"Insert a number (top n rows to be selected):", value=5, step=1
)
# Clear button
clear_button = st.sidebar.button("Clear Conversation", key="clear")
# Credit
current_year = current_year() # This will print the current year
st.markdown(
f"""
<h6 style='text-align: left;'>Copyright Β© 2010-{current_year} Present Yiqiao Yin</h6>
""",
unsafe_allow_html=True,
)
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Reset everything
if clear_button:
st.session_state.messages = []
# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Check if any files have been uploaded
if uploaded_files is None:
# Display a message prompting the user to upload files
st.info("Upload files to analyze")
elif uploaded_files:
with st.spinner("Wait for it... πŸ€”"):
# Process the uploaded files to extract text and source information
textify_output = read_and_textify(uploaded_files, chunk_size=chunk_size_input)
# Separate the output into documents (text) and their corresponding sources
documents, sources = textify_output
# Call the function
query_database = list_to_nums(documents)
# Create reference table
refs_tab = query_search(
"pful for understanding federal income", documents, query_database, sources
)
refs_tab = refs_tab.head(math.ceil(top_n))
# React to user input
if prompt := st.chat_input("What is up?"):
# Display user message in chat message container
st.chat_message("user").markdown(prompt)
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": prompt})
result = refs_tab
# Display assistant response in chat message container
with st.chat_message("assistant"):
st.table(result)
# Add assistant response to chat history
st.session_state.messages.append({"role": "assistant", "content": result})