decodingdatascience commited on
Commit
3ea7b4f
·
verified ·
1 Parent(s): a71050a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -0
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ import gradio as gr
4
+
5
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
6
+ from llama_index.llms.openai import OpenAI
7
+ from llama_index.embeddings.openai import OpenAIEmbedding
8
+
9
+
10
+ # ======================
11
+ # Config (safe defaults)
12
+ # ======================
13
+ MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")
14
+ EMBED_MODEL = os.getenv("OPENAI_EMBED_MODEL", "text-embedding-3-small")
15
+ TOP_K = int(os.getenv("TOP_K", "3"))
16
+
17
+ # Your knowledge base file in the Space repo
18
+ DOC_PATH = Path(os.getenv("DOC_PATH", "challenge_context.txt"))
19
+
20
+ SYSTEM_GUARDRAILS = (
21
+ "You are Challenge Copilot. Answer ONLY using the provided context. "
22
+ "If the answer is not in the context, say: 'I don’t know based on the current document.' "
23
+ "Then ask the user to add the missing official details to challenge_context.txt."
24
+ )
25
+
26
+
27
+ # ======================
28
+ # Build index (cached)
29
+ # ======================
30
+ _INDEX = None
31
+ _QUERY_ENGINE = None
32
+
33
+ def build_index():
34
+ global _INDEX, _QUERY_ENGINE
35
+
36
+ if _QUERY_ENGINE is not None:
37
+ return _QUERY_ENGINE
38
+
39
+ api_key = os.getenv("OPENAI_API_KEY")
40
+ if not api_key:
41
+ raise RuntimeError(
42
+ "OPENAI_API_KEY is missing. Add it in the Space Settings → Variables and secrets."
43
+ )
44
+
45
+ if not DOC_PATH.exists():
46
+ # Create a placeholder so the Space boots even if you forgot the file
47
+ DOC_PATH.write_text(
48
+ "Add the official Building AI Application Challenge content here.\n",
49
+ encoding="utf-8",
50
+ )
51
+
52
+ # LlamaIndex global settings
53
+ Settings.llm = OpenAI(model=MODEL, temperature=0.2)
54
+ Settings.embed_model = OpenAIEmbedding(model=EMBED_MODEL)
55
+ Settings.chunk_size = 800
56
+ Settings.chunk_overlap = 120
57
+
58
+ # Reader expects a directory
59
+ data_dir = str(DOC_PATH.parent)
60
+ docs = SimpleDirectoryReader(
61
+ input_dir=data_dir,
62
+ required_exts=[".txt"],
63
+ recursive=False
64
+ ).load_data()
65
+
66
+ # Only index the target file
67
+ docs = [d for d in docs if d.metadata.get("file_name") == DOC_PATH.name]
68
+ if not docs:
69
+ raise FileNotFoundError(f"Could not load {DOC_PATH.name}. Make sure it exists in the repo.")
70
+
71
+ _INDEX = VectorStoreIndex.from_documents(docs)
72
+ _QUERY_ENGINE = _INDEX.as_query_engine(similarity_top_k=TOP_K)
73
+ return _QUERY_ENGINE
74
+
75
+
76
+ def format_sources(resp, max_sources=3, max_chars=220):
77
+ lines = []
78
+ for i, sn in enumerate(getattr(resp, "source_nodes", [])[:max_sources], start=1):
79
+ fn = sn.node.metadata.get("file_name", "unknown")
80
+ snippet = sn.node.get_content().replace("\n", " ").strip()[:max_chars]
81
+ score = getattr(sn, "score", None)
82
+ score_txt = f" (score={score:.3f})" if isinstance(score, (float, int)) else ""
83
+ lines.append(f"{i}. {fn}{score_txt}: {snippet}...")
84
+ return "\n".join(lines) if lines else "No sources returned."
85
+
86
+
87
+ def chat(message, history):
88
+ qe = build_index()
89
+
90
+ prompt = (
91
+ f"{SYSTEM_GUARDRAILS}\n\n"
92
+ f"User question: {message}\n"
93
+ f"Answer using ONLY the context."
94
+ )
95
+ resp = qe.query(prompt)
96
+ answer = str(resp).strip()
97
+
98
+ show_sources = os.getenv("SHOW_SOURCES", "true").lower() == "true"
99
+ if show_sources:
100
+ answer += "\n\n---\nSources:\n" + format_sources(resp, max_sources=TOP_K)
101
+
102
+ return answer
103
+
104
+
105
+ demo = gr.ChatInterface(
106
+ fn=chat,
107
+ title="Challenge Copilot — RAG Q&A Bot",
108
+ description="Ask questions about the Building AI Application Challenge using challenge_context.txt (LlamaIndex + OpenAI).",
109
+ examples=[
110
+ "What will I build in this live session?",
111
+ "Who is this best for?",
112
+ "What are the prerequisites?"
113
+ ],
114
+ theme="soft"
115
+ )
116
+
117
+ if __name__ == "__main__":
118
+ demo.launch()