Spaces:

eagle0504
/

document-search-q-series

Running

File size: 5,656 Bytes

import math
import os
from datetime import datetime

import openai
import PyPDF2
import streamlit as st
from openai import OpenAI

from helper.utils import *

st.set_page_config(layout="wide", page_title="Document Search using QIM🤖📖")
st.header("Document Search using Quantized Influence Measure (QIM)🤖📖")
st.write("---")


# Streamlit sidebar setup for user interface
with st.sidebar:
    # Create an expandable instruction manual section in the sidebar
    with st.expander("Instruction Manual 📖"):
        # Display the instruction manual for the Document Data Chatbot in a formatted markdown
        st.markdown(
            """
            # Document Data Chatbot User Manual 🤖📖
            
            Welcome to the Document Data Chatbot, your interactive assistant for information on the textual "Document Data". This chatbot offers quick and accurate responses to your queries. Follow these steps to interact with the chatbot:

            ## Getting Started 🚀
            1. **Access the Chatbot**: Launch the Document Data Chatbot on your device.
            2. **Start Chatting**: Type your Document Data-related questions in the chat window. Questions can range from dosage to side effects.
            3. **Send Your Question**: Submit your query by clicking 'Send' or pressing 'Enter'.

            ## Chatting with Document Data Chatbot 🤔💬
            - **Ask Anything**: Inquiries about textual composition, usage, storage, or safety are all welcome.
            - **Use Simple Language**: Clear and concise questions yield the best results.
            - **Wait for the Response**: The chatbot will promptly process and answer your query.
            - **Follow-Up Questions**: Feel free to ask additional or new questions anytime.

            ## Tips for a Better Experience ✨
            - **Be Specific**: Specific questions help in getting precise answers.
            - **Check for Typing Errors**: Correct spelling ensures better understanding by the chatbot.
            - **Emoji Use**: Emojis are welcome in your questions!
            - **Patience is Key**: Responses may take a moment as the chatbot processes your query.

            ## Support and Feedback 🤝
            - **Need Help?**: Contact our support team for any issues.
            - **Share Your Feedback**: Your input is valuable and helps us improve.

            ## The Team Behind the App 🧑‍💻👩‍💻
            - **Founders**: Learn about [Peter Yin](https://www.linkedin.com/in/peter-yin-7914ba25/) and [Yiqiao Yin](https://www.linkedin.com/in/yiqiaoyin/), the founders, on LinkedIn.

            Thank you for choosing the Document Data Chatbot. We're here to provide all the information you need about Document Data efficiently. Happy chatting! 🎉💬
            """
        )

    # File uploader widget allowing users to upload text and PDF documents
    uploaded_files = st.file_uploader(
        "Upload documents", accept_multiple_files=True, type=["txt", "pdf"]
    )

    # Inform the user how many documents have been loaded
    st.success(f"{len(uploaded_files)} document(s) loaded...")

    # Input filter
    top_n = st.number_input(
        "Insert a number (top n rows to be selected):", value=5, step=1
    )

    # Use dictionary
    use_dict_format = st.checkbox("Use dictionary format.")

    # Submit button
    submit_it = st.sidebar.button("Submit", type="primary")

    # Clear button
    clear_button = st.sidebar.button("Clear Conversation", key="clear")
    # Credit
    current_year = current_year()  # This will print the current year
    st.markdown(
        f"""
            <h6 style='text-align: left;'>Copyright © 2010-{current_year} Present Yiqiao Yin</h6>
        """,
        unsafe_allow_html=True,
    )


# Initialize chat history
if "messages" not in st.session_state:
    st.session_state.messages = []


# Reset everything
if clear_button:
    st.session_state.messages = []


# Display chat messages from history on app rerun
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])


# Check if any files have been uploaded
if uploaded_files is None:
    # Display a message prompting the user to upload files
    st.info("Upload files to analyze")


elif uploaded_files:
    if submit_it:
        # Process the uploaded files to extract text and source information
        textify_output = read_and_textify(uploaded_files)

        # Separate the output into documents (text) and their corresponding sources
        documents, sources = textify_output

        # Call the function
        query_database = list_to_nums(documents)

        # Create reference table
        refs_tab = query_search(
            "pful for understanding federal income", documents, query_database, sources
        )
        refs_tab.head(math.ceil(top_n))

        # React to user input
        if prompt := st.chat_input("What is up?"):
            # Display user message in chat message container
            st.chat_message("user").markdown(prompt)
            # Add user message to chat history
            st.session_state.messages.append({"role": "user", "content": prompt})

            result = refs_tab

            # Display assistant response in chat message container
            with st.chat_message("assistant"):
                if not use_dict_format:
                    st.table(result)
                else:
                    st.write(result.to_json())
            # Add assistant response to chat history
            st.session_state.messages.append({"role": "assistant", "content": result})