jedick commited on
Commit
5a19eb4
·
1 Parent(s): cc54402

Revert "Attempt fix for ChromaDB ValueError"

Browse files

This reverts commit 9477d3a838873d357a7dba604e1675933fb649c8.

Files changed (6) hide show
  1. app.py +9 -11
  2. graph.py +3 -0
  3. main.py +0 -8
  4. mods/tool_calling_llm.py +0 -4
  5. requirements.txt +0 -2
  6. retriever.py +3 -4
app.py CHANGED
@@ -4,7 +4,7 @@ from graph import BuildGraph
4
  from retriever import db_dir
5
  from langgraph.checkpoint.memory import MemorySaver
6
  from dotenv import load_dotenv
7
- from main import openai_model, model_id, DownloadChatModel
8
  from util import get_sources, get_start_end_months
9
  from mods.tool_calling_llm import extract_think
10
  import requests
@@ -82,6 +82,7 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
82
  if compute_mode == "local":
83
  gr.Info(
84
  f"Please wait for the local model to load",
 
85
  title=f"Model loading...",
86
  )
87
  # Get the chat model and build the graph
@@ -210,11 +211,6 @@ def to_workflow(request: gr.Request, *args):
210
  # Add session_hash to arguments
211
  new_args = args + (request.session_hash,)
212
  if compute_mode == "local":
213
- # If graph hasn't been instantiated, download model before running workflow
214
- graph = graph_instances[compute_mode].get(request.session_hash)
215
- if graph is None:
216
- DownloadChatModel()
217
- # Call the workflow function with the @spaces.GPU decorator
218
  for value in run_workflow_local(*new_args):
219
  yield value
220
  if compute_mode == "remote":
@@ -222,7 +218,7 @@ def to_workflow(request: gr.Request, *args):
222
  yield value
223
 
224
 
225
- @spaces.GPU(duration=90)
226
  def run_workflow_local(*args):
227
  for value in run_workflow(*args):
228
  yield value
@@ -268,11 +264,13 @@ with gr.Blocks(
268
  "local",
269
  "remote",
270
  ],
271
- # Default to remote because it provides a better first impression for most people
272
- # value=("local" if torch.cuda.is_available() else "remote"),
273
- value="remote",
274
  label="Compute Mode",
275
- info="NOTE: remote mode **does not** use ZeroGPU",
 
 
 
 
276
  render=False,
277
  )
278
 
 
4
  from retriever import db_dir
5
  from langgraph.checkpoint.memory import MemorySaver
6
  from dotenv import load_dotenv
7
+ from main import openai_model, model_id
8
  from util import get_sources, get_start_end_months
9
  from mods.tool_calling_llm import extract_think
10
  import requests
 
82
  if compute_mode == "local":
83
  gr.Info(
84
  f"Please wait for the local model to load",
85
+ duration=15,
86
  title=f"Model loading...",
87
  )
88
  # Get the chat model and build the graph
 
211
  # Add session_hash to arguments
212
  new_args = args + (request.session_hash,)
213
  if compute_mode == "local":
 
 
 
 
 
214
  for value in run_workflow_local(*new_args):
215
  yield value
216
  if compute_mode == "remote":
 
218
  yield value
219
 
220
 
221
+ @spaces.GPU(duration=60)
222
  def run_workflow_local(*args):
223
  for value in run_workflow(*args):
224
  yield value
 
264
  "local",
265
  "remote",
266
  ],
267
+ value=("local" if torch.cuda.is_available() else "remote"),
 
 
268
  label="Compute Mode",
269
+ info=(
270
+ "NOTE: remote mode **does not** use ZeroGPU"
271
+ if torch.cuda.is_available()
272
+ else "NOTE: local mode requires GPU"
273
+ ),
274
  render=False,
275
  )
276
 
graph.py CHANGED
@@ -12,6 +12,9 @@ from retriever import BuildRetriever
12
  from prompts import query_prompt, generate_prompt, generic_tools_template
13
  from mods.tool_calling_llm import ToolCallingLLM
14
 
 
 
 
15
  # For tracing (disabled)
16
  # os.environ["LANGSMITH_TRACING"] = "true"
17
  # os.environ["LANGSMITH_PROJECT"] = "R-help-chat"
 
12
  from prompts import query_prompt, generate_prompt, generic_tools_template
13
  from mods.tool_calling_llm import ToolCallingLLM
14
 
15
+ # Local modules
16
+ from retriever import BuildRetriever
17
+
18
  # For tracing (disabled)
19
  # os.environ["LANGSMITH_TRACING"] = "true"
20
  # os.environ["LANGSMITH_PROJECT"] = "R-help-chat"
main.py CHANGED
@@ -5,7 +5,6 @@ from langchain_core.output_parsers import StrOutputParser
5
  from langgraph.checkpoint.memory import MemorySaver
6
  from langchain_core.messages import ToolMessage
7
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
8
- from huggingface_hub import snapshot_download
9
  from datetime import datetime
10
  from dotenv import load_dotenv
11
  import os
@@ -129,13 +128,6 @@ def ProcessDirectory(path, compute_mode):
129
  print(f"Chroma: no change for {file_path}")
130
 
131
 
132
- def DownloadChatModel():
133
- """
134
- Downloads a chat model to the local Hugging Face cache.
135
- """
136
- snapshot_download(model_id)
137
-
138
-
139
  def GetChatModel(compute_mode):
140
  """
141
  Get a chat model.
 
5
  from langgraph.checkpoint.memory import MemorySaver
6
  from langchain_core.messages import ToolMessage
7
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 
8
  from datetime import datetime
9
  from dotenv import load_dotenv
10
  import os
 
128
  print(f"Chroma: no change for {file_path}")
129
 
130
 
 
 
 
 
 
 
 
131
  def GetChatModel(compute_mode):
132
  """
133
  Get a chat model.
mods/tool_calling_llm.py CHANGED
@@ -177,10 +177,6 @@ class ToolCallingLLM(BaseChatModel, ABC):
177
  # Extract <think>...</think> content and text after </think> for further processing 20250726 jmd
178
  think_text, post_think = extract_think(response_message.content)
179
 
180
- ## For debugging
181
- # print("post_think")
182
- # print(post_think)
183
-
184
  # Parse output for JSON (support multiple objects separated by commas)
185
  try:
186
  parsed_json_results = json.loads(f"[{post_think}]")
 
177
  # Extract <think>...</think> content and text after </think> for further processing 20250726 jmd
178
  think_text, post_think = extract_think(response_message.content)
179
 
 
 
 
 
180
  # Parse output for JSON (support multiple objects separated by commas)
181
  try:
182
  parsed_json_results = json.loads(f"[{post_think}]")
requirements.txt CHANGED
@@ -29,5 +29,3 @@ spaces==0.37.1
29
  #boto3==1.39.14
30
  # Others
31
  python-dotenv
32
- # For snapshot_download
33
- huggingface-hub==0.33.2
 
29
  #boto3==1.39.14
30
  # Others
31
  python-dotenv
 
 
retriever.py CHANGED
@@ -174,10 +174,9 @@ def BuildRetrieverDense(compute_mode: str, top_k=6):
174
  # Get top k documents
175
  search_kwargs={"k": top_k},
176
  )
177
- ## Fix for ValueError('Could not connect to tenant default_tenant. Are you sure it exists?')
178
- ## ... but it breaks retrieval on ZeroGPU
179
- ## https://github.com/langchain-ai/langchain/issues/26884
180
- # chromadb.api.client.SharedSystemClient.clear_system_cache()
181
  return retriever
182
 
183
 
 
174
  # Get top k documents
175
  search_kwargs={"k": top_k},
176
  )
177
+ ## Release GPU memory
178
+ ## https://github.com/langchain-ai/langchain/discussions/10668
179
+ # torch.cuda.empty_cache()
 
180
  return retriever
181
 
182