Spaces:
Sleeping
Sleeping
File size: 6,656 Bytes
a406aa7 b4c1bd0 a406aa7 8bbf95a a406aa7 2aebe83 a406aa7 bffd196 a406aa7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
import os
import shutil
import streamlit as st
from io import BytesIO
# Importing LlamaIndex components
from llama_index.llms.openai import OpenAI
from qdrant_client.http import models
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex, StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core.memory import ChatMemoryBuffer
import qdrant_client
# =============================================================================
# Configuration and Global Initialization
# =============================================================================
# Ensure that the OpenAI API key is available
openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
raise ValueError("Please set your OPENAI_API_KEY environment variable.")
# System prompt for the chat engine
SYSTEM_PROMPT = (
"You are an AI assistant who answers the user questions, "
"use the schema fields to generate appropriate and valid json queries"
)
# Configure the LLM and embedding models
Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.4)
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")
# Load initial documents from a directory called "new_file"
if os.path.exists("new_file"):
documents = SimpleDirectoryReader("new_file").load_data()
else:
documents = []
# Set up the Qdrant vector store (using an in-memory collection for simplicity)
client = qdrant_client.QdrantClient(location=":memory:")
vector_store = QdrantVectorStore(
collection_name="paper",
client=client,
vector_field="embedding",
enable_hybrid=True,
batch_size=20,
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
# Build the initial index and chat engine
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
chat_memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
chat_engine = index.as_chat_engine(
chat_mode="context",
memory=chat_memory,
system_prompt=SYSTEM_PROMPT,
)
# =============================================================================
# Helper Functions
# =============================================================================
def process_uploaded_file(uploaded_file: BytesIO) -> str:
"""
Process the uploaded file:
1. Save the file to an "uploads" folder.
2. Copy it to a temporary folder ("temp_upload") for reading.
3. Update the global documents list and rebuild the index and chat engine.
"""
if uploaded_file is None:
return "No file uploaded."
# Ensure the uploads directory exists
uploads_dir = "uploads"
os.makedirs(uploads_dir, exist_ok=True)
# Save the uploaded file locally
file_name = uploaded_file.name
dest_path = os.path.join(uploads_dir, file_name)
with open(dest_path, "wb") as f:
f.write(uploaded_file.getbuffer())
# Prepare a temporary directory for processing the file
temp_dir = "temp_upload"
os.makedirs(temp_dir, exist_ok=True)
# Clear any existing file in temp_upload directory
for f_name in os.listdir(temp_dir):
os.remove(os.path.join(temp_dir, f_name))
shutil.copy(dest_path, temp_dir)
# Load new document(s) from the temporary folder using SimpleDirectoryReader
new_docs = SimpleDirectoryReader(temp_dir).load_data()
# Update global documents and rebuild the index and chat engine
global documents, index, chat_engine
documents.extend(new_docs)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
chat_engine = index.as_chat_engine(
chat_mode="context",
memory=chat_memory,
system_prompt=SYSTEM_PROMPT,
)
return f"File '{file_name}' processed and added to the index."
def chat_with_ai(user_input: str) -> str:
"""
Send user input to the chat engine and return the response.
"""
response = chat_engine.chat(user_input)
# Extract references from the response (if any)
references = response.source_nodes
ref = []
for node in references:
if "file_name" in node.metadata and node.metadata["file_name"] not in ref:
ref.append(node.metadata["file_name"])
complete_response = str(response)
if ref:
complete_response += "\n\nReferences: " + ", ".join(ref)
return complete_response
# =============================================================================
# Streamlit App Layout
# =============================================================================
st.set_page_config(page_title="LlamaIndex Chat & File Upload", layout="wide")
st.title("Chat Interface for LlamaIndex with File Upload")
# Use Streamlit tabs for separate Chat and Upload functionalities
tab1, tab2 = st.tabs(["Chat", "Upload"])
# -----------------------------------------------------------------------------
# Chat Tab
# -----------------------------------------------------------------------------
with tab1:
st.header("Chat with the AI")
# Initialize chat history in session state if it does not exist
if "chat_history" not in st.session_state:
st.session_state["chat_history"] = []
# Display conversation history
for chat in st.session_state["chat_history"]:
st.markdown(f"**User:** {chat[0]}")
st.markdown(f"**AI:** {chat[1]}")
st.markdown("---")
# Input text for user query
user_input = st.text_input("Enter your question:")
# When the "Send" button is clicked, process the chat
if st.button("Send") and user_input:
with st.spinner("Processing..."):
response = chat_with_ai(user_input)
st.session_state["chat_history"].append((user_input, response))
st.experimental_rerun() # Refresh the page to show updated history
# Button to clear the conversation history
if st.button("Clear History"):
st.session_state["chat_history"] = []
st.experimental_rerun()
# -----------------------------------------------------------------------------
# Upload Tab
# -----------------------------------------------------------------------------
with tab2:
st.header("Upload a File")
uploaded_file = st.file_uploader("Choose a file to upload", type=["txt", "pdf", "doc", "docx", "csv", "xlsx"])
if st.button("Upload and Process"):
if uploaded_file is not None:
with st.spinner("Uploading and processing file..."):
status = process_uploaded_file(uploaded_file)
st.success(status)
else:
st.error("No file uploaded.")
|