L1SupportBot / app.py
MillyZen's picture
Upload 3 files
a4071bd verified
Raw
History Blame Contribute Delete
3.86 kB
# app.py — L1Bot (Gradio) for Hugging Face Spaces
# Auto-index on upload (TXT/PDF), then ask questions immediately.
# Do NOT call demo.launch(); Spaces runs the "demo" Blocks automatically.
import os
from typing import List
import gradio as gr
from langchain_community.document_loaders import TextLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.prompts import ChatPromptTemplate
VS_HOLDER = None # in-session FAISS store
def set_api_key(key: str):
if key:
os.environ["OPENAI_API_KEY"] = key.strip()
def _load_docs(files: List[str]):
docs = []
for f in files or []:
path = f.name if hasattr(f, "name") else f
ext = os.path.splitext(path.lower())[-1]
if ext == ".txt":
docs += TextLoader(path, encoding="utf-8").load()
elif ext == ".pdf":
docs += PyPDFLoader(path).load()
return docs
def _chunk(docs):
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
return splitter.split_documents(docs)
def auto_index(files):
global VS_HOLDER
docs = _load_docs(files)
if not docs:
return "Please upload .txt or .pdf files."
chunks = _chunk(docs)
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
if VS_HOLDER is None:
VS_HOLDER = FAISS.from_documents(chunks, embeddings)
return f"Indexed {len(chunks)} chunks from {len(files)} file(s). You can ask questions now."
else:
VS_HOLDER.add_documents(chunks)
return f"Updated index: +{len(chunks)} chunks from {len(files)} file(s)."
def make_chain():
if VS_HOLDER is None:
return None
system_prompt = (
"You are a helpful assistant. Answer ONLY from the provided context. "
"If unsure, say you don't know. End with 'Sources:' and cite filenames when possible."
)
prompt = ChatPromptTemplate.from_messages([
("system", system_prompt),
("human", "Question: {question}\\n\\nContext:\\n{context}")
])
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
chain = RetrievalQA.from_chain_type(
llm=llm,
retriever=VS_HOLDER.as_retriever(search_kwargs={"k": 3}),
chain_type="stuff",
chain_type_kwargs={"prompt": prompt, "document_variable_name": "context"},
return_source_documents=True,
)
return chain
def chat_fn(message, history):
if not os.getenv("OPENAI_API_KEY"):
return "Add your OpenAI API key first (top of the page)."
chain = make_chain()
if chain is None:
return "Upload files to auto-build the index, then ask your question."
out = chain({"query": message})
return out["result"]
# ---- UI ----
has_key = bool(os.getenv("OPENAI_API_KEY"))
with gr.Blocks(title="L1Bot — Auto-index on upload") as demo:
gr.Markdown("## 🛟 L1Bot — Ask questions on your files\\n"
"Upload **.txt/.pdf** and the index builds automatically. Then just ask.")
if not has_key:
with gr.Row():
api = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-... (set once per Space run)")
set_btn = gr.Button("Set Key")
set_btn.click(set_api_key, inputs=[api], outputs=[])
files = gr.File(label="Upload TXT/PDF (multiple allowed) — auto-indexes",
file_count="multiple", file_types=[".txt", ".pdf"])
status = gr.Markdown("")
gr.Markdown("---")
chat = gr.ChatInterface(fn=chat_fn, title="Ask about your documents",
description="Answers are grounded in your uploaded files.")
files.change(auto_index, inputs=[files], outputs=[status])