# app.py — L1Bot (Gradio) for Hugging Face Spaces # Auto-index on upload (TXT/PDF), then ask questions immediately. # Do NOT call demo.launch(); Spaces runs the "demo" Blocks automatically. import os from typing import List import gradio as gr from langchain_community.document_loaders import TextLoader, PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_openai import OpenAIEmbeddings, ChatOpenAI from langchain_community.vectorstores import FAISS from langchain.chains import RetrievalQA from langchain.prompts import ChatPromptTemplate VS_HOLDER = None # in-session FAISS store def set_api_key(key: str): if key: os.environ["OPENAI_API_KEY"] = key.strip() def _load_docs(files: List[str]): docs = [] for f in files or []: path = f.name if hasattr(f, "name") else f ext = os.path.splitext(path.lower())[-1] if ext == ".txt": docs += TextLoader(path, encoding="utf-8").load() elif ext == ".pdf": docs += PyPDFLoader(path).load() return docs def _chunk(docs): splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100) return splitter.split_documents(docs) def auto_index(files): global VS_HOLDER docs = _load_docs(files) if not docs: return "Please upload .txt or .pdf files." chunks = _chunk(docs) embeddings = OpenAIEmbeddings(model="text-embedding-3-small") if VS_HOLDER is None: VS_HOLDER = FAISS.from_documents(chunks, embeddings) return f"Indexed {len(chunks)} chunks from {len(files)} file(s). You can ask questions now." else: VS_HOLDER.add_documents(chunks) return f"Updated index: +{len(chunks)} chunks from {len(files)} file(s)." def make_chain(): if VS_HOLDER is None: return None system_prompt = ( "You are a helpful assistant. Answer ONLY from the provided context. " "If unsure, say you don't know. End with 'Sources:' and cite filenames when possible." ) prompt = ChatPromptTemplate.from_messages([ ("system", system_prompt), ("human", "Question: {question}\\n\\nContext:\\n{context}") ]) llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.0) chain = RetrievalQA.from_chain_type( llm=llm, retriever=VS_HOLDER.as_retriever(search_kwargs={"k": 3}), chain_type="stuff", chain_type_kwargs={"prompt": prompt, "document_variable_name": "context"}, return_source_documents=True, ) return chain def chat_fn(message, history): if not os.getenv("OPENAI_API_KEY"): return "Add your OpenAI API key first (top of the page)." chain = make_chain() if chain is None: return "Upload files to auto-build the index, then ask your question." out = chain({"query": message}) return out["result"] # ---- UI ---- has_key = bool(os.getenv("OPENAI_API_KEY")) with gr.Blocks(title="L1Bot — Auto-index on upload") as demo: gr.Markdown("## 🛟 L1Bot — Ask questions on your files\\n" "Upload **.txt/.pdf** and the index builds automatically. Then just ask.") if not has_key: with gr.Row(): api = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-... (set once per Space run)") set_btn = gr.Button("Set Key") set_btn.click(set_api_key, inputs=[api], outputs=[]) files = gr.File(label="Upload TXT/PDF (multiple allowed) — auto-indexes", file_count="multiple", file_types=[".txt", ".pdf"]) status = gr.Markdown("") gr.Markdown("---") chat = gr.ChatInterface(fn=chat_fn, title="Ask about your documents", description="Answers are grounded in your uploaded files.") files.change(auto_index, inputs=[files], outputs=[status])