Spaces:

decodingdatascience
/

Challengebot

Running

App Files Files Community

decodingdatascience commited on 1 day ago

Commit

98278f3

verified ·

1 Parent(s): 336d701

Create app2.py

Browse files

Files changed (1) hide show

app2.py +257 -0

app2.py ADDED Viewed

	@@ -0,0 +1,257 @@

+import os
+from pathlib import Path
+import gradio as gr
+from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
+from llama_index.llms.openai import OpenAI
+from llama_index.embeddings.openai import OpenAIEmbedding
+# ======================
+# Config (safe defaults)
+# ======================
+MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
+EMBED_MODEL = os.getenv("OPENAI_EMBED_MODEL", "text-embedding-3-small")
+TOP_K = int(os.getenv("TOP_K", "3"))
+# Your knowledge base file in the Space repo
+DOC_PATH = Path(os.getenv("DOC_PATH", "challenge_context.txt"))
+# DDS logo (raw GitHub URL)
+LOGO_URL = os.getenv(
+    "LOGO_URL",
+    "https://github.com/Decoding-Data-Science/airesidency/blob/main/dds_logo.jpg?raw=true",
+)
+SYSTEM_GUARDRAILS = (
+    "You are Challenge Copilot. Answer ONLY using the provided context. "
+    "If the answer is not in the context, say: 'I don’t know based on the current document.' "
+    "Then ask the user to add the missing official details to challenge_context.txt."
+)
+APP_TITLE = "Challenge Copilot — RAG Q&A Bot"
+APP_SUBTITLE = (
+    "A simple Retrieval-Augmented Generation (RAG) chatbot that answers questions about the "
+    "Building AI Application Challenge using challenge_context.txt (LlamaIndex + OpenAI)."
+)
+# ======================
+# Build index (cached)
+# ======================
+_INDEX = None
+_QUERY_ENGINE = None
+def build_index():
+    global _INDEX, _QUERY_ENGINE
+    if _QUERY_ENGINE is not None:
+        return _QUERY_ENGINE
+    if not os.getenv("OPENAI_API_KEY"):
+        raise RuntimeError(
+            "OPENAI_API_KEY is missing. Add it in the Space Settings → Variables and secrets."
+        )
+    if not DOC_PATH.exists():
+        DOC_PATH.write_text(
+            "Add the official Building AI Application Challenge content here.\n",
+            encoding="utf-8",
+        )
+    # LlamaIndex global settings
+    Settings.llm = OpenAI(model=MODEL, temperature=0.2)
+    Settings.embed_model = OpenAIEmbedding(model=EMBED_MODEL)
+    Settings.chunk_size = 800
+    Settings.chunk_overlap = 120
+    # Reader expects a directory
+    data_dir = str(DOC_PATH.parent)
+    docs = SimpleDirectoryReader(
+        input_dir=data_dir,
+        required_exts=[".txt"],
+        recursive=False
+    ).load_data()
+    # Only index the target file
+    docs = [d for d in docs if d.metadata.get("file_name") == DOC_PATH.name]
+    if not docs:
+        raise FileNotFoundError(f"Could not load {DOC_PATH.name}. Make sure it exists in the repo.")
+    _INDEX = VectorStoreIndex.from_documents(docs)
+    _QUERY_ENGINE = _INDEX.as_query_engine(similarity_top_k=TOP_K)
+    return _QUERY_ENGINE
+def format_sources(resp, max_sources=3, max_chars=240):
+    lines = []
+    for i, sn in enumerate(getattr(resp, "source_nodes", [])[:max_sources], start=1):
+        fn = sn.node.metadata.get("file_name", "unknown")
+        snippet = sn.node.get_content().replace("\n", " ").strip()[:max_chars]
+        score = getattr(sn, "score", None)
+        score_txt = f" (score={score:.3f})" if isinstance(score, (float, int)) else ""
+        lines.append(f"{i}. {fn}{score_txt}: {snippet}...")
+    return "\n".join(lines) if lines else "No sources returned."
+def chat(message, history):
+    qe = build_index()
+    prompt = (
+        f"{SYSTEM_GUARDRAILS}\n\n"
+        f"User question: {message}\n"
+        f"Answer using ONLY the context."
+    )
+    resp = qe.query(prompt)
+    answer = str(resp).strip()
+    show_sources = os.getenv("SHOW_SOURCES", "true").lower() == "true"
+    if show_sources:
+        answer += "\n\n---\n**Sources:**\n" + format_sources(resp, max_sources=TOP_K)
+    return answer
+# ======================
+# UI (professional layout)
+# ======================
+CSS = """
+/* Layout polish */
+.dds-header { display:flex; align-items:center; gap:16px; }
+.dds-logo img { height:60px; width:auto; border-radius:10px; box-shadow: 0 2px 10px rgba(0,0,0,0.10); }
+.dds-title { margin:0; line-height:1.1; }
+.dds-subtitle { margin:6px 0 0 0; color: #555; }
+.dds-card { border: 1px solid rgba(0,0,0,0.08); border-radius: 14px; padding: 14px; background: rgba(255,255,255,0.7); }
+.dds-section-title { margin: 0 0 6px 0; }
+.dds-muted { color: #666; font-size: 0.95rem; }
+"""
+# Theme fallback (no theme passed to ChatInterface itself)
+try:
+    theme_obj = gr.themes.Soft()
+except Exception:
+    theme_obj = None
+with gr.Blocks(theme=theme_obj, css=CSS, title=APP_TITLE) as demo:
+    # Header row (Logo left + Title right)
+    with gr.Row():
+        with gr.Column(scale=1, min_width=140):
+            # Use HTML for reliable remote image rendering
+            gr.HTML(
+                f"""
+                <div class="dds-logo">
+                    <img src="{LOGO_URL}" alt="DDS Logo"/>
+                </div>
+                """
+            )
+        with gr.Column(scale=6):
+            gr.HTML(
+                f"""
+                <div class="dds-header">
+                    <div>
+                        <h2 class="dds-title">{APP_TITLE}</h2>
+                        <p class="dds-subtitle">{APP_SUBTITLE}</p>
+                        <p class="dds-muted">
+                            Tip: If an answer is missing, add more official details to <b>challenge_context.txt</b> and restart the Space.
+                        </p>
+                    </div>
+                </div>
+                """
+            )
+    gr.Markdown("---")
+    # Two professional sections
+    with gr.Row():
+        # Section 1: Chat
+        with gr.Column(scale=6):
+            gr.HTML(
+                """
+                <div class="dds-card">
+                    <h3 class="dds-section-title">Section 1 — Ask the Copilot</h3>
+                    <p class="dds-muted">RAG flow: retrieve relevant chunks → generate a grounded answer using your LLM API.</p>
+                </div>
+                """
+            )
+            # ChatInterface (NO theme kwarg here)
+            gr.ChatInterface(
+                fn=chat,
+                examples=[
+                    "What will I build in this live session?",
+                    "Who is this best for?",
+                    "What are the prerequisites?",
+                    "What is the RAG flow in this project?"
+                ],
+            )
+        # Section 2: FAQ
+        with gr.Column(scale=4):
+            gr.HTML(
+                """
+                <div class="dds-card">
+                    <h3 class="dds-section-title">Section 2 — FAQ</h3>
+                    <p class="dds-muted">Common issues + quick fixes for deployment and content quality.</p>
+                </div>
+                """
+            )
+            with gr.Accordion("FAQ 1 — The bot says “I don’t know”", open=False):
+                gr.Markdown(
+                    """
+- This means the answer is **not present** in `challenge_context.txt`.
+- Add the missing official content (rules, checkpoints, prizes, submission format, dates).
+- Commit the updated TXT and **restart** the Space.
+                    """.strip()
+                )
+            with gr.Accordion("FAQ 2 — OPENAI_API_KEY missing", open=False):
+                gr.Markdown(
+                    """
+- Go to your Space → **Settings → Variables and secrets**
+- Add: `OPENAI_API_KEY`
+- Save (Space restarts automatically).
+                    """.strip()
+                )
+            with gr.Accordion("FAQ 3 — Sources are not showing", open=False):
+                gr.Markdown(
+                    """
+- Ensure `SHOW_SOURCES=true` in Space variables (or leave it unset; default is true).
+- Increase `TOP_K` if you want more retrieved chunks.
+                    """.strip()
+                )
+            with gr.Accordion("FAQ 4 — Improve answer quality", open=False):
+                gr.Markdown(
+                    """
+- Add more structured content into your TXT (headings + bullet points).
+- Keep each checkpoint/rule as a clear section.
+- Increase `TOP_K` slightly (e.g., 4–6) if context is larger.
+                    """.strip()
+                )
+            with gr.Accordion("FAQ 5 — App fails on startup", open=False):
+                gr.Markdown(
+                    """
+- Check Space logs.
+- Most common causes:
+  - Missing `challenge_context.txt` in repo
+  - Missing `OPENAI_API_KEY`
+  - Dependency mismatch (simplify `requirements.txt`)
+                    """.strip()
+                )
+    gr.Markdown("---")
+    gr.Markdown(
+        """
+**Admin notes**
+- Context file: `challenge_context.txt`
+- Model env vars: `OPENAI_MODEL`, `OPENAI_EMBED_MODEL`
+- Retrieval env vars: `TOP_K`
+- Sources toggle: `SHOW_SOURCES=true|false`
+        """.strip()
+    )
+if __name__ == "__main__":
+    demo.launch()