File size: 6,487 Bytes
c7b8fe1
e87b1c3
c7b8fe1
 
4e4df74
e87b1c3
db6743d
 
 
4e4df74
de611b5
e87b1c3
859ac01
e87b1c3
 
c7b8fe1
 
 
 
e87b1c3
de611b5
 
e87b1c3
de611b5
c7b8fe1
db6743d
e87b1c3
c7b8fe1
db6743d
 
 
0ff301d
e87b1c3
c7b8fe1
e87b1c3
 
 
c7b8fe1
e87b1c3
 
 
c7b8fe1
 
 
 
 
 
 
 
 
e87b1c3
 
 
 
 
 
 
c7b8fe1
d94ed24
c7b8fe1
e87b1c3
4e4df74
e87b1c3
cfe8e30
e87b1c3
57ec1a6
 
c7b8fe1
cfe8e30
57ec1a6
db6743d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40b4466
de611b5
cfe8e30
de611b5
cfe8e30
c7b8fe1
 
 
 
7eb1131
 
 
db6743d
c7b8fe1
 
137ec55
 
 
c7b8fe1
 
 
cfe8e30
c7b8fe1
 
db6743d
 
 
c7b8fe1
 
 
cfe8e30
2d854de
6559d1f
2d854de
db6743d
2d854de
a7e2073
 
 
2d854de
 
 
 
4e4df74
cfe8e30
2d854de
cfe8e30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# --------------------------------------------------------------
# app.py
# --------------------------------------------------------------
import os, time, importlib.util
import gradio as gr
from huggingface_hub import hf_hub_download
from datetime import datetime
from datetime import date
from upstash_redis import Redis

# ----------------------------------------------------------------------
# Helper to read secrets from the HF Space environment
# ----------------------------------------------------------------------
def _secret(key: str, fallback: str = None) -> str:
    val = os.getenv(key)
    if val is not None:
        return val
    if fallback is not None:
        return fallback
    raise RuntimeError(f"Secret '{key}' not found. Please add it to your Space secrets.")

# ----------------------------------------------------------------------
# 1. Configuration & Constants
# ----------------------------------------------------------------------
REPO_ID = _secret("REPO_ID")
FILES_TO_DOWNLOAD = ["index.faiss", "index.pkl", "agent_logic.py","prec_hyde_agent.txt","prec_rag_agent.txt"]
LOCAL_DOWNLOAD_DIR = "downloaded_assets"
EMBEDDING_MODEL_NAME = "google/embeddinggemma-300m"
redis = Redis(url=_secret("UPSTASH_REDIS_URL")
              , token= _secret("UPSTASH_TOKEN")
             )

# ----------------------------------------------------------------------
# 2. Bootstrap Phase – download assets and import the RAG engine
# ----------------------------------------------------------------------
print("--- [UI App] Starting bootstrap process ---")
os.makedirs(LOCAL_DOWNLOAD_DIR, exist_ok=True)
hf_token = _secret("HF_TOKEN")

for filename in FILES_TO_DOWNLOAD:
    print(f"--- [UI App] Downloading '{filename}'... ---")
    hf_hub_download(
        repo_id=REPO_ID,
        filename=filename,
        repo_type="dataset",
        local_dir=LOCAL_DOWNLOAD_DIR,
        token=hf_token,
    )

# Dynamically import the RAG_Engine class
logic_script_path = os.path.join(LOCAL_DOWNLOAD_DIR, "agent_logic.py")
spec = importlib.util.spec_from_file_location("agent_logic", logic_script_path)
agent_logic_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(agent_logic_module)

engine = agent_logic_module.RAG_Engine(
    local_download_dir=LOCAL_DOWNLOAD_DIR,
    embedding_model_name=EMBEDDING_MODEL_NAME,
)

print("--- [UI App] Bootstrap complete. Gradio UI is starting. ---")

# ----------------------------------------------------------------------
# 3. Core Gradio Chat Logic
# ----------------------------------------------------------------------
def respond(message: str, history: list[dict[str, str]]):
    """
    Called by Gradio for each user message.
    Streams the response back to the UI.
    """
    try:
        # Check expiration
        end_date = datetime.strptime(_secret("END_DATE"), "%Y-%m-%d").date()
        if date.today() > end_date:
            return "Chatbot sudah expired."  # Direct return for errors

        # Check request limit  
        remaining_requests = redis.decr("request_limit")
        if remaining_requests < 0:
            return "Kuota chat sudah habis."  # Direct return for errors

        # If we pass all checks, then stream the response
        final_response = engine.get_response(message, history)
    
        # Stream the response with typing effect
        response = ""
        for char in final_response:
            response += char
            time.sleep(0.01)
            yield response
            
    except Exception as e:
        print(f"Error in respond function: {e}")
        return "Terjadi error saat memproses permintaan. Silakan coba lagi."

# ----------------------------------------------------------------------
# 4. UI Layout – Tips + Chat + Footer
# ----------------------------------------------------------------------
# 4.1  Tips (you can edit this markdown as you wish)
tips_md = r"""
## 📋 Tips menggunakan **PRECISE RAG Agent**

- **Apa itu PRECISE RAG Agent?**  
  AI Agent yang menggunakan Retrieval‑Augmented Generation (RAG) untuk menjawab pertanyaan dari dokumentasi PRECISE (disimpan dalam FAISS storage).  
- **Perbedaan dengan chatbot sebelumnya :**  
  • Dengan menggunakan agentic RAG, agent hanya mengambil dokumentasi yang dibutuhkan.  
  • Karena efisiensi konteks dan efisiensi design, dapat menggunakan model kecerdasan rendah, sehingga cost turun sekitar 95% dibanding versi non RAG yang sebelumnya.  
- **Tips untuk menggunakan**  
  • Usahakan pertanyaan Anda spesifik agar jawaban lebih akurat.  
  • Jika jawaban kurang mengena, coba reset chat atau tanyakan ulang dengan bahasa berbeda.  
  • Dokumentasi chatbot mungkin berbeda penomoran-nya dengan dokumentasi yang Anda punya karena diefisiensi/diorganisasi untuk vector storage, sehingga jangan mereferensi nomor atau butir, tapi langsung sebutkan konsepnya.  
  • Jika konsep sangat luas atau terlalu kompleks, coba breakdown konsep ke bagian-bagiannya.  Usahakan satu pertanyaan mendalam yang dilengkapi konteks daripada banyak pertanyaan yang menyangkut banyak aspek berbeda.  

"""

# 4.2  Footer – the old description / notes
footer_md = r"""
---  
**Komponen**: LangChain + FAISS  + Redis
**Models**: Qwen3-4B-Thinking-2507, Qwen3-4B-Instruct-2507  
*Updated 25 Sep 2025 – YOI*  

"""

# 4.3  Chat component (no description here)
chatbot = gr.ChatInterface(
    respond,
    type="messages",
    title="PRECISE RAG Agent (Expired 1 April 2026)",
    examples=[
        ["Jelaskan konsep PRECISE secara sederhana."],
        ["Berapa keuntungan finansial yang didapat menggunakan PRECISE?"],
        ["Tuliskan implementasi logika LOESS menggunakan JAVA"]
    ],
    cache_examples=False,
    theme=gr.themes.Soft(),
)

# 4.4  Assemble everything inside a Blocks container
with gr.Blocks() as demo:
    # Optional: add a small vertical space at the top 
    chatbot.render()             # <-- Main chat UI
    gr.Markdown(tips_md)          # <-- Tips section (renders LaTeX)
    gr.Markdown(footer_md)        # <-- Footer (old description)

# ----------------------------------------------------------------------
# 5. Launch
# ----------------------------------------------------------------------
if __name__ == "__main__":
    allowed_user = _secret("CHAT_USER")
    allowed_pass = _secret("CHAT_PASS")
    demo.launch(
        auth=(allowed_user, allowed_pass),
        server_name="0.0.0.0",
        ssr_mode=False,
        server_port=7860,
    )