File size: 6,656 Bytes
a406aa7
 
 
 
 
 
 
b4c1bd0
a406aa7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8bbf95a
a406aa7
2aebe83
a406aa7
 
 
bffd196
a406aa7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import os
import shutil
import streamlit as st
from io import BytesIO

# Importing LlamaIndex components
from llama_index.llms.openai import OpenAI
from qdrant_client.http import models
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core.memory import ChatMemoryBuffer
import qdrant_client

# =============================================================================
# Configuration and Global Initialization
# =============================================================================

# Ensure that the OpenAI API key is available
openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
    raise ValueError("Please set your OPENAI_API_KEY environment variable.")

# System prompt for the chat engine
SYSTEM_PROMPT = (
    "You are an AI assistant who answers the user questions, "
    "use the schema fields to generate appropriate and valid json queries"
)

# Configure the LLM and embedding models
Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.4)
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

# Load initial documents from a directory called "new_file"
if os.path.exists("new_file"):
    documents = SimpleDirectoryReader("new_file").load_data()
else:
    documents = []

# Set up the Qdrant vector store (using an in-memory collection for simplicity)

client = qdrant_client.QdrantClient(location=":memory:")

vector_store = QdrantVectorStore(
    collection_name="paper",
    client=client,
    vector_field="embedding", 
    enable_hybrid=True,
    batch_size=20,
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# Build the initial index and chat engine
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
chat_memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
chat_engine = index.as_chat_engine(
    chat_mode="context",
    memory=chat_memory,
    system_prompt=SYSTEM_PROMPT,
)

# =============================================================================
# Helper Functions
# =============================================================================

def process_uploaded_file(uploaded_file: BytesIO) -> str:
    """
    Process the uploaded file:
      1. Save the file to an "uploads" folder.
      2. Copy it to a temporary folder ("temp_upload") for reading.
      3. Update the global documents list and rebuild the index and chat engine.
    """
    if uploaded_file is None:
        return "No file uploaded."

    # Ensure the uploads directory exists
    uploads_dir = "uploads"
    os.makedirs(uploads_dir, exist_ok=True)

    # Save the uploaded file locally
    file_name = uploaded_file.name
    dest_path = os.path.join(uploads_dir, file_name)
    with open(dest_path, "wb") as f:
        f.write(uploaded_file.getbuffer())

    # Prepare a temporary directory for processing the file
    temp_dir = "temp_upload"
    os.makedirs(temp_dir, exist_ok=True)
    # Clear any existing file in temp_upload directory
    for f_name in os.listdir(temp_dir):
        os.remove(os.path.join(temp_dir, f_name))
    shutil.copy(dest_path, temp_dir)

    # Load new document(s) from the temporary folder using SimpleDirectoryReader
    new_docs = SimpleDirectoryReader(temp_dir).load_data()

    # Update global documents and rebuild the index and chat engine
    global documents, index, chat_engine
    documents.extend(new_docs)
    index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
    chat_engine = index.as_chat_engine(
        chat_mode="context",
        memory=chat_memory,
        system_prompt=SYSTEM_PROMPT,
    )

    return f"File '{file_name}' processed and added to the index."

def chat_with_ai(user_input: str) -> str:
    """
    Send user input to the chat engine and return the response.
    """
    response = chat_engine.chat(user_input)
    # Extract references from the response (if any)
    references = response.source_nodes
    ref = []
    for node in references:
        if "file_name" in node.metadata and node.metadata["file_name"] not in ref:
            ref.append(node.metadata["file_name"])
    complete_response = str(response)
    if ref:
        complete_response += "\n\nReferences: " + ", ".join(ref)
    return complete_response

# =============================================================================
# Streamlit App Layout
# =============================================================================

st.set_page_config(page_title="LlamaIndex Chat & File Upload", layout="wide")
st.title("Chat Interface for LlamaIndex with File Upload")

# Use Streamlit tabs for separate Chat and Upload functionalities
tab1, tab2 = st.tabs(["Chat", "Upload"])

# -----------------------------------------------------------------------------
# Chat Tab
# -----------------------------------------------------------------------------
with tab1:
    st.header("Chat with the AI")
    # Initialize chat history in session state if it does not exist
    if "chat_history" not in st.session_state:
        st.session_state["chat_history"] = []

    # Display conversation history
    for chat in st.session_state["chat_history"]:
        st.markdown(f"**User:** {chat[0]}")
        st.markdown(f"**AI:** {chat[1]}")
        st.markdown("---")

    # Input text for user query
    user_input = st.text_input("Enter your question:")

    # When the "Send" button is clicked, process the chat
    if st.button("Send") and user_input:
        with st.spinner("Processing..."):
            response = chat_with_ai(user_input)
        st.session_state["chat_history"].append((user_input, response))
        st.experimental_rerun()  # Refresh the page to show updated history

    # Button to clear the conversation history
    if st.button("Clear History"):
        st.session_state["chat_history"] = []
        st.experimental_rerun()

# -----------------------------------------------------------------------------
# Upload Tab
# -----------------------------------------------------------------------------
with tab2:
    st.header("Upload a File")
    uploaded_file = st.file_uploader("Choose a file to upload", type=["txt", "pdf", "doc", "docx", "csv", "xlsx"])
    if st.button("Upload and Process"):
        if uploaded_file is not None:
            with st.spinner("Uploading and processing file..."):
                status = process_uploaded_file(uploaded_file)
            st.success(status)
        else:
            st.error("No file uploaded.")