ALVHB95 commited on
Commit
99a1538
·
1 Parent(s): 76b4e6a
Files changed (1) hide show
  1. app.py +38 -33
app.py CHANGED
@@ -1,11 +1,10 @@
1
  """
2
  =========================================================
3
  app.py — Green Greta (Gradio + TF/Keras 3 + LangChain 0.3)
4
- - Chat tab uses Blocks + Chatbot(height=...)
5
- - LLM: meta-llama/Meta-Llama-3.1-8B-Instruct
6
- - RAG: e5-base-v2 + (BM25+Vector) with safe fallback + Multi-Query + reranker ✅
7
- - Language selector: Auto, English, German, French, Italian, Portuguese, Hindi, Spanish, Thai
8
- - No JSON output leakage ✅
9
  =========================================================
10
  """
11
 
@@ -13,14 +12,14 @@ import os
13
  import json
14
  import shutil
15
 
16
- # --- Env / telemetry (set before imports that use them) ---
17
  os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
18
  os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1")
19
  os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False")
20
  os.environ.setdefault("ANONYMIZED_TELEMETRY", "false")
21
  os.environ.setdefault("CHROMA_TELEMETRY_ENABLED", "FALSE")
22
  os.environ.setdefault("USER_AGENT", "green-greta/1.0 (+contact-or-repo)")
23
- # Optional: reproducible CPU math (silences some TF logs)
24
  # os.environ.setdefault("TF_ENABLE_ONEDNN_OPTS", "0")
25
 
26
  import gradio as gr
@@ -36,7 +35,7 @@ except Exception:
36
  user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
37
  header_template = {"User-Agent": user_agent}
38
 
39
- # --- LangChain core / RAG ---
40
  from langchain_text_splitters import RecursiveCharacterTextSplitter
41
  from langchain_core.prompts import ChatPromptTemplate
42
  from langchain.chains import ConversationalRetrievalChain
@@ -46,19 +45,18 @@ from langchain_community.vectorstores import Chroma
46
 
47
  # Embeddings
48
  try:
49
- from langchain_huggingface import HuggingFaceEmbeddings # pip install -U langchain-huggingface
50
  except ImportError:
51
  from langchain_community.embeddings import HuggingFaceEmbeddings
52
 
53
  # Retrieval utilities
54
  from langchain.retrievers import ContextualCompressionRetriever, EnsembleRetriever
55
- from langchain.retrievers.document_compressors import CrossEncoderReranker
56
  from langchain.retrievers.multi_query import MultiQueryRetriever
57
-
58
  from langchain_community.retrievers import BM25Retriever
59
  from langchain_community.cross_encoders import HuggingFaceCrossEncoder
60
 
61
- # HF Hub for SavedModel
62
  from huggingface_hub import snapshot_download
63
 
64
  # LLM via HF Inference
@@ -128,7 +126,7 @@ base_splitter = RecursiveCharacterTextSplitter(
128
  )
129
  docs = base_splitter.split_documents(all_loaded_docs)
130
 
131
- # Embeddings (better recall)
132
  embeddings = HuggingFaceEmbeddings(model_name="intfloat/e5-base-v2")
133
 
134
  # Vector store
@@ -143,10 +141,10 @@ vectordb = Chroma.from_documents(
143
  # Vector retriever
144
  vec_retriever = vectordb.as_retriever(search_kwargs={"k": 8}, search_type="mmr")
145
 
146
- # BM25 + Ensemble with safe fallback if rank-bm25 isn't installed
147
  use_bm25 = True
148
  try:
149
- bm25 = BM25Retriever.from_documents(docs) # requires rank-bm25
150
  bm25.k = 8
151
  except Exception as e:
152
  print(f"[RAG] BM25 unavailable ({e}). Falling back to vector-only retriever.")
@@ -159,14 +157,14 @@ else:
159
  base_retriever = vec_retriever
160
 
161
  # ======================================
162
- # 3) PROMPT (with target language variable)
 
163
  # ======================================
164
  SYSTEM_TEMPLATE = (
165
- "You are Greta, a bilingual recycling & sustainability assistant.\n"
166
- "- Always answer in the *target language*: {target_language}.\n"
167
- "- If target_language is 'Auto', detect the user's language and answer in that language.\n"
168
- "- Be direct, practical, and base your answer only on the snippets below; if they are insufficient, say so and propose actionable next steps.\n"
169
- "- Do not reveal or mention 'snippets' or internal tools.\n\n"
170
  "{context}\n\n"
171
  "Question: {question}"
172
  )
@@ -197,10 +195,10 @@ memory = ConversationBufferMemory(
197
  return_messages=True,
198
  )
199
 
200
- # Multi-Query boosts recall by generating paraphrases
201
  mqr = MultiQueryRetriever.from_llm(retriever=base_retriever, llm=llm, include_original=True)
202
 
203
- # Cross-encoder reranker (lighter/faster than large)
204
  cross_encoder = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base")
205
  reranker = CrossEncoderReranker(model=cross_encoder, top_n=4)
206
 
@@ -220,13 +218,21 @@ qa_chain = ConversationalRetrievalChain.from_llm(
220
  return_source_documents=False,
221
  )
222
 
 
 
 
 
 
 
223
  def chat_interface(question: str, history, target_language: str = "Auto"):
224
- """Wrap the RAG chain to return a clean text answer in the requested language."""
225
  try:
226
- result = qa_chain.invoke({"question": question, "target_language": target_language})
 
 
227
  answer = result.get("answer", "")
228
  if not answer:
229
- return "Sorry, I couldn't produce a useful answer from the available information."
230
  return answer
231
  except Exception as e:
232
  return (
@@ -267,7 +273,7 @@ banner_tab = gr.Markdown(banner_tab_content)
267
 
268
  SUPPORTED_LANGS = ["Auto", "English", "German", "French", "Italian", "Portuguese", "Hindi", "Spanish", "Thai"]
269
 
270
- # CSS: make chat area taller and widen app a bit
271
  custom_css = """
272
  .gradio-container { max-width: 1200px !important; }
273
  #greta-chat { height: 700px !important; }
@@ -276,16 +282,15 @@ custom_css = """
276
  """
277
 
278
  def _user_submit(user_msg, history):
279
- """Append user turn; bot fills later."""
280
  if not user_msg:
281
  return "", history
282
  history = history + [[user_msg, None]]
283
  return "", history
284
 
285
  def _bot_respond(history, target_language):
286
- """Generate bot answer for the last user turn in the requested language."""
287
  user_msg = history[-1][0]
288
- # Pass previous history to our RAG function (excluding the current empty bot turn)
289
  answer = chat_interface(user_msg, history[:-1], target_language=target_language or "Auto")
290
  history[-1][1] = answer
291
  return history
@@ -303,7 +308,7 @@ with gr.Blocks(theme=theme, css=custom_css) as chatbot_gradio_app:
303
  undo = gr.Button("↩︎ Undo")
304
  clear = gr.Button("🗑 Clear")
305
 
306
- # Submit via button or Enter (pass language value into the responder)
307
  send.click(_user_submit, [msg, chat], [msg, chat], queue=False).then(
308
  _bot_respond, [chat, lang_sel], [chat]
309
  )
@@ -311,11 +316,11 @@ with gr.Blocks(theme=theme, css=custom_css) as chatbot_gradio_app:
311
  _bot_respond, [chat, lang_sel], [chat]
312
  )
313
 
314
- # Utilities respect current language selection too
315
  clear.click(lambda: [], None, chat, queue=False)
316
  undo.click(lambda h: h[:-1] if h else h, chat, chat, queue=False)
317
  retry.click(
318
- lambda h: (h[:-1] + [[h[-1][0], None]]) if h else h, # re-ask last user msg
319
  chat, chat, queue=False
320
  ).then(_bot_respond, [chat, lang_sel], [chat])
321
 
 
1
  """
2
  =========================================================
3
  app.py — Green Greta (Gradio + TF/Keras 3 + LangChain 0.3)
4
+ - Chat tab: Blocks + Chatbot(height=...)
5
+ - LLM: meta-llama/Meta-Llama-3.1-8B-Instruct
6
+ - RAG: e5-base-v2 + (BM25+Vector) con fallback + Multi-Query + reranker ✅
7
+ - Responde en el idioma elegido (sin pasar claves extra)
 
8
  =========================================================
9
  """
10
 
 
12
  import json
13
  import shutil
14
 
15
+ # --- Env / telemetry (antes de imports que lo usen) ---
16
  os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
17
  os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1")
18
  os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False")
19
  os.environ.setdefault("ANONYMIZED_TELEMETRY", "false")
20
  os.environ.setdefault("CHROMA_TELEMETRY_ENABLED", "FALSE")
21
  os.environ.setdefault("USER_AGENT", "green-greta/1.0 (+contact-or-repo)")
22
+ # Opcional: resultados CPU más estables de TF
23
  # os.environ.setdefault("TF_ENABLE_ONEDNN_OPTS", "0")
24
 
25
  import gradio as gr
 
35
  user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
36
  header_template = {"User-Agent": user_agent}
37
 
38
+ # --- LangChain / RAG ---
39
  from langchain_text_splitters import RecursiveCharacterTextSplitter
40
  from langchain_core.prompts import ChatPromptTemplate
41
  from langchain.chains import ConversationalRetrievalChain
 
45
 
46
  # Embeddings
47
  try:
48
+ from langchain_huggingface import HuggingFaceEmbeddings
49
  except ImportError:
50
  from langchain_community.embeddings import HuggingFaceEmbeddings
51
 
52
  # Retrieval utilities
53
  from langchain.retrievers import ContextualCompressionRetriever, EnsembleRetriever
 
54
  from langchain.retrievers.multi_query import MultiQueryRetriever
55
+ from langchain.retrievers.document_compressors import CrossEncoderReranker
56
  from langchain_community.retrievers import BM25Retriever
57
  from langchain_community.cross_encoders import HuggingFaceCrossEncoder
58
 
59
+ # HF Hub
60
  from huggingface_hub import snapshot_download
61
 
62
  # LLM via HF Inference
 
126
  )
127
  docs = base_splitter.split_documents(all_loaded_docs)
128
 
129
+ # Embeddings
130
  embeddings = HuggingFaceEmbeddings(model_name="intfloat/e5-base-v2")
131
 
132
  # Vector store
 
141
  # Vector retriever
142
  vec_retriever = vectordb.as_retriever(search_kwargs={"k": 8}, search_type="mmr")
143
 
144
+ # BM25 + Ensemble con fallback si falta rank-bm25
145
  use_bm25 = True
146
  try:
147
+ bm25 = BM25Retriever.from_documents(docs) # requiere rank-bm25
148
  bm25.k = 8
149
  except Exception as e:
150
  print(f"[RAG] BM25 unavailable ({e}). Falling back to vector-only retriever.")
 
157
  base_retriever = vec_retriever
158
 
159
  # ======================================
160
+ # 3) PROMPT (sin variables extra: solo {context} y {question})
161
+ # Instruimos al modelo a obedecer un prefijo en la propia pregunta.
162
  # ======================================
163
  SYSTEM_TEMPLATE = (
164
+ "You are Greta, a recycling & sustainability assistant. "
165
+ "Follow any explicit language directive at the start of the question, e.g., "
166
+ "‘Answer ONLY in Spanish.’ If there is no directive, detect the user's language and answer accordingly. "
167
+ "Be direct and practical. If the snippets are insufficient, say so and suggest actionable next steps.\n\n"
 
168
  "{context}\n\n"
169
  "Question: {question}"
170
  )
 
195
  return_messages=True,
196
  )
197
 
198
+ # Multi-Query (paráfrasis de la consulta)
199
  mqr = MultiQueryRetriever.from_llm(retriever=base_retriever, llm=llm, include_original=True)
200
 
201
+ # Reranker (cross-encoder base)
202
  cross_encoder = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base")
203
  reranker = CrossEncoderReranker(model=cross_encoder, top_n=4)
204
 
 
218
  return_source_documents=False,
219
  )
220
 
221
+ # ===== Helper: construir prefijo de idioma en la propia pregunta =====
222
+ def _lang_directive(lang: str) -> str:
223
+ if not lang or lang.strip().lower() == "auto":
224
+ return "Detect the user's language and answer in that language."
225
+ return f"Answer ONLY in {lang}."
226
+
227
  def chat_interface(question: str, history, target_language: str = "Auto"):
228
+ """Devuelve respuesta limpia en el idioma solicitado, SIN pasar claves extra al chain."""
229
  try:
230
+ directive = _lang_directive(target_language)
231
+ combined_q = f"{directive}\n\n{question}"
232
+ result = qa_chain.invoke({"question": combined_q})
233
  answer = result.get("answer", "")
234
  if not answer:
235
+ return "Lo siento, no pude generar una respuesta útil con la información disponible."
236
  return answer
237
  except Exception as e:
238
  return (
 
273
 
274
  SUPPORTED_LANGS = ["Auto", "English", "German", "French", "Italian", "Portuguese", "Hindi", "Spanish", "Thai"]
275
 
276
+ # CSS: ampliar área de chat y ancho general
277
  custom_css = """
278
  .gradio-container { max-width: 1200px !important; }
279
  #greta-chat { height: 700px !important; }
 
282
  """
283
 
284
  def _user_submit(user_msg, history):
285
+ """Añade turno del usuario; el bot responde después."""
286
  if not user_msg:
287
  return "", history
288
  history = history + [[user_msg, None]]
289
  return "", history
290
 
291
  def _bot_respond(history, target_language):
292
+ """Genera la respuesta del bot en el idioma solicitado."""
293
  user_msg = history[-1][0]
 
294
  answer = chat_interface(user_msg, history[:-1], target_language=target_language or "Auto")
295
  history[-1][1] = answer
296
  return history
 
308
  undo = gr.Button("↩︎ Undo")
309
  clear = gr.Button("🗑 Clear")
310
 
311
+ # Envío por botón o Enter (pasamos el idioma al responder)
312
  send.click(_user_submit, [msg, chat], [msg, chat], queue=False).then(
313
  _bot_respond, [chat, lang_sel], [chat]
314
  )
 
316
  _bot_respond, [chat, lang_sel], [chat]
317
  )
318
 
319
+ # Utilidades
320
  clear.click(lambda: [], None, chat, queue=False)
321
  undo.click(lambda h: h[:-1] if h else h, chat, chat, queue=False)
322
  retry.click(
323
+ lambda h: (h[:-1] + [[h[-1][0], None]]) if h else h, # reintenta la última pregunta
324
  chat, chat, queue=False
325
  ).then(_bot_respond, [chat, lang_sel], [chat])
326