Spaces:

eagle0504
/

document-search-q-series

Running

App Files Files Community

document-search-q-series / app.py

eagle0504

chunk size added as input arg

2ecca1e 3 months ago

raw

history blame

No virus

5.58 kB

	import math
	import os
	from datetime import datetime

	import openai
	import PyPDF2
	import streamlit as st
	from openai import OpenAI

	from helper.utils import *

	st.set_page_config(layout="wide", page_title="Document Search using QIM🤖📖")
	st.header("Document Search using Quantized Influence Measure (QIM)🤖📖")
	st.write("---")


	# Streamlit sidebar setup for user interface
	with st.sidebar:
	# Create an expandable instruction manual section in the sidebar
	with st.expander("Instruction Manual 📖"):
	# Display the instruction manual for the Document Data Chatbot in a formatted markdown
	st.markdown(
	"""
	# Document Data Chatbot User Manual 🤖📖

	Welcome to the Document Data Chatbot, your interactive assistant for information on the textual "Document Data". This chatbot offers quick and accurate responses to your queries. Follow these steps to interact with the chatbot:

	## Getting Started 🚀
	1. Access the Chatbot: Launch the Document Data Chatbot on your device.
	2. Start Chatting: Type your Document Data-related questions in the chat window. Questions can range from dosage to side effects.
	3. Send Your Question: Submit your query by clicking 'Send' or pressing 'Enter'.

	## Chatting with Document Data Chatbot 🤔💬
	- Ask Anything: Inquiries about textual composition, usage, storage, or safety are all welcome.
	- Use Simple Language: Clear and concise questions yield the best results.
	- Wait for the Response: The chatbot will promptly process and answer your query.
	- Follow-Up Questions: Feel free to ask additional or new questions anytime.

	## Tips for a Better Experience ✨
	- Be Specific: Specific questions help in getting precise answers.
	- Check for Typing Errors: Correct spelling ensures better understanding by the chatbot.
	- Emoji Use: Emojis are welcome in your questions!
	- Patience is Key: Responses may take a moment as the chatbot processes your query.

	## Support and Feedback 🤝
	- Need Help?: Contact our support team for any issues.
	- Share Your Feedback: Your input is valuable and helps us improve.

	## The Team Behind the App 🧑‍💻👩‍💻
	- Founders: Learn about [Peter Yin](https://www.linkedin.com/in/peter-yin-7914ba25/) and [Yiqiao Yin](https://www.linkedin.com/in/yiqiaoyin/), the founders, on LinkedIn.

	Thank you for choosing the Document Data Chatbot. We're here to provide all the information you need about Document Data efficiently. Happy chatting! 🎉💬
	"""
	)

	# File uploader widget allowing users to upload text and PDF documents
	uploaded_files = st.file_uploader(
	"Upload documents", accept_multiple_files=True, type=["txt", "pdf"]
	)

	# Inform the user how many documents have been loaded
	st.success(f"{len(uploaded_files)} document(s) loaded...")

	# Chunk size
	chunk_size_input = st.number_input(
	"Insert an integer (for size of chunks):", value=10, step=1
	)

	# Input filter
	top_n = st.number_input(
	"Insert a number (top n rows to be selected):", value=5, step=1
	)

	# Clear button
	clear_button = st.sidebar.button("Clear Conversation", key="clear")

	# Credit
	current_year = current_year() # This will print the current year
	st.markdown(
	f"""
	<h6 style='text-align: left;'>Copyright © 2010-{current_year} Present Yiqiao Yin</h6>
	""",
	unsafe_allow_html=True,
	)


	# Initialize chat history
	if "messages" not in st.session_state:
	st.session_state.messages = []


	# Reset everything
	if clear_button:
	st.session_state.messages = []


	# Display chat messages from history on app rerun
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])


	# Check if any files have been uploaded
	if uploaded_files is None:
	# Display a message prompting the user to upload files
	st.info("Upload files to analyze")

	elif uploaded_files:
	with st.spinner("Wait for it... 🤔"):
	# Process the uploaded files to extract text and source information
	textify_output = read_and_textify(uploaded_files, chunk_size=chunk_size_input)

	# Separate the output into documents (text) and their corresponding sources
	documents, sources = textify_output

	# Call the function
	query_database = list_to_nums(documents)

	# Create reference table
	refs_tab = query_search(
	"pful for understanding federal income", documents, query_database, sources
	)
	refs_tab = refs_tab.head(math.ceil(top_n))

	# React to user input
	if prompt := st.chat_input("What is up?"):
	# Display user message in chat message container
	st.chat_message("user").markdown(prompt)
	# Add user message to chat history
	st.session_state.messages.append({"role": "user", "content": prompt})

	result = refs_tab

	# Display assistant response in chat message container
	with st.chat_message("assistant"):
	st.table(result)
	# Add assistant response to chat history
	st.session_state.messages.append({"role": "assistant", "content": result})