Spaces:
Sleeping
Sleeping
| import os | |
| from pathlib import Path | |
| import gradio as gr | |
| from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings | |
| from llama_index.llms.openai import OpenAI | |
| from llama_index.embeddings.openai import OpenAIEmbedding | |
| MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini") | |
| EMBED_MODEL = os.getenv("OPENAI_EMBED_MODEL", "text-embedding-3-small") | |
| TOP_K = int(os.getenv("TOP_K", "3")) | |
| DOC_PATH = Path(os.getenv("DOC_PATH", "challenge_context.txt")) | |
| SYSTEM_GUARDRAILS = ( | |
| "You are Challenge Copilot. Answer ONLY using the provided context. " | |
| "If the answer is not in the context, say: 'I don’t know based on the current document.' " | |
| "Then ask the user to add the missing official details to challenge_context.txt." | |
| ) | |
| _INDEX = None | |
| _QUERY_ENGINE = None | |
| def build_index(): | |
| global _INDEX, _QUERY_ENGINE | |
| if _QUERY_ENGINE is not None: | |
| return _QUERY_ENGINE | |
| if not os.getenv("OPENAI_API_KEY"): | |
| raise RuntimeError( | |
| "OPENAI_API_KEY is missing. Add it in the Space Settings → Variables and secrets." | |
| ) | |
| if not DOC_PATH.exists(): | |
| DOC_PATH.write_text( | |
| "Add the official Building AI Application Challenge content here.\n", | |
| encoding="utf-8", | |
| ) | |
| Settings.llm = OpenAI(model=MODEL, temperature=0.2) | |
| Settings.embed_model = OpenAIEmbedding(model=EMBED_MODEL) | |
| Settings.chunk_size = 800 | |
| Settings.chunk_overlap = 120 | |
| data_dir = str(DOC_PATH.parent) | |
| docs = SimpleDirectoryReader( | |
| input_dir=data_dir, | |
| required_exts=[".txt"], | |
| recursive=False | |
| ).load_data() | |
| docs = [d for d in docs if d.metadata.get("file_name") == DOC_PATH.name] | |
| if not docs: | |
| raise FileNotFoundError(f"Could not load {DOC_PATH.name}. Make sure it exists in the repo.") | |
| _INDEX = VectorStoreIndex.from_documents(docs) | |
| _QUERY_ENGINE = _INDEX.as_query_engine(similarity_top_k=TOP_K) | |
| return _QUERY_ENGINE | |
| def format_sources(resp, max_sources=3, max_chars=220): | |
| lines = [] | |
| for i, sn in enumerate(getattr(resp, "source_nodes", [])[:max_sources], start=1): | |
| fn = sn.node.metadata.get("file_name", "unknown") | |
| snippet = sn.node.get_content().replace("\n", " ").strip()[:max_chars] | |
| score = getattr(sn, "score", None) | |
| score_txt = f" (score={score:.3f})" if isinstance(score, (float, int)) else "" | |
| lines.append(f"{i}. {fn}{score_txt}: {snippet}...") | |
| return "\n".join(lines) if lines else "No sources returned." | |
| def chat(message, history): | |
| qe = build_index() | |
| prompt = ( | |
| f"{SYSTEM_GUARDRAILS}\n\n" | |
| f"User question: {message}\n" | |
| f"Answer using ONLY the context." | |
| ) | |
| resp = qe.query(prompt) | |
| answer = str(resp).strip() | |
| show_sources = os.getenv("SHOW_SOURCES", "true").lower() == "true" | |
| if show_sources: | |
| answer += "\n\n---\nSources:\n" + format_sources(resp, max_sources=TOP_K) | |
| return answer | |
| # ---- UI ---- | |
| try: | |
| theme_obj = gr.themes.Soft() | |
| except Exception: | |
| theme_obj = None # compatibility fallback | |
| with gr.Blocks(theme=theme_obj) as demo: | |
| gr.Markdown("# Challenge Copilot — RAG Q&A Bot") | |
| gr.Markdown("Ask questions about the Building AI Application Challenge using challenge_context.txt (LlamaIndex + OpenAI).") | |
| gr.ChatInterface( | |
| fn=chat, | |
| examples=[ | |
| "What will I build in this live session?", | |
| "Who is this best for?", | |
| "What are the prerequisites?" | |
| ], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |