broadfield-dev commited on
Commit
64ba224
·
verified ·
1 Parent(s): 2960c28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -4
app.py CHANGED
@@ -5,8 +5,9 @@ import os
5
  import json
6
  import io
7
  import subprocess # To call process_hf_dataset.py
8
- from database import init_chromadb, store_program, query_programs, load_chromadb_from_hf, DB_NAME, create_collection
9
  import logging
 
10
 
11
  # Set up logging
12
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -127,6 +128,25 @@ def index():
127
  if collection is None or not hasattr(collection, 'add'):
128
  raise ValueError("ChromaDB collection creation failed")
129
  logger.info("Verified ChromaDB collection is valid")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  # Process dataset
132
  result = subprocess.run(['python', 'process_hf_dataset.py'], check=True, capture_output=True, text=True, cwd=os.path.dirname(__file__))
@@ -144,7 +164,7 @@ def index():
144
  reconstructed_code=None,
145
  code_input=None,
146
  query_results=None,
147
- message="Hugging Face dataset processed and stored successfully with fresh database."
148
  )
149
  except subprocess.CalledProcessError as e:
150
  logger.error(f"Error processing Hugging Face dataset: {e.stderr}")
@@ -163,6 +183,9 @@ def index():
163
  if collection is None or not hasattr(collection, 'add'):
164
  raise ValueError("ChromaDB collection access failed")
165
  logger.info("Verified ChromaDB collection is valid")
 
 
 
166
 
167
  # Process dataset
168
  result = subprocess.run(['python', 'process_hf_dataset.py'], check=True, capture_output=True, text=True, cwd=os.path.dirname(__file__))
@@ -189,7 +212,7 @@ def index():
189
  logger.error(f"Unexpected error loading Hugging Face dataset: {e}")
190
  return f"Unexpected error loading Hugging Face dataset: {e}", 500
191
  elif 'reset_db' in request.form:
192
- # Reset ChromaDB collection (no repopulation with samples)
193
  try:
194
  client = init_chromadb()
195
  try:
@@ -206,6 +229,23 @@ def index():
206
  # Verify collection is empty
207
  count = collection.count()
208
  logger.info(f"ChromaDB now contains {count} entries after reset (should be 0)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  return render_template(
210
  'results_partial.html',
211
  parts=None,
@@ -213,7 +253,7 @@ def index():
213
  reconstructed_code=None,
214
  code_input=None,
215
  query_results=None,
216
- message="Database reset successfully."
217
  )
218
  except Exception as e:
219
  logger.error(f"Error resetting database: {e}")
 
5
  import json
6
  import io
7
  import subprocess # To call process_hf_dataset.py
8
+ from database import init_chromadb, store_program, query_programs, load_chromadb_from_hf, DB_NAME, create_collection, save_chromadb_to_hf
9
  import logging
10
+ from datasets import Dataset
11
 
12
  # Set up logging
13
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
128
  if collection is None or not hasattr(collection, 'add'):
129
  raise ValueError("ChromaDB collection creation failed")
130
  logger.info("Verified ChromaDB collection is valid")
131
+ # Verify collection is empty
132
+ count = collection.count()
133
+ logger.info(f"ChromaDB now contains {count} entries after reset (should be 0)")
134
+
135
+ # Reset Hugging Face dataset (replace with empty dataset)
136
+ try:
137
+ empty_data = {
138
+ "code": [],
139
+ "sequence": [],
140
+ "vectors": [],
141
+ "description_tokens": [],
142
+ "program_vectors": []
143
+ }
144
+ empty_dataset = Dataset.from_dict(empty_data)
145
+ empty_dataset.push_to_hub(HF_DATASET_NAME, token=os.getenv("HF_KEY"))
146
+ logger.info(f"Replaced Hugging Face dataset {HF_DATASET_NAME} with empty dataset")
147
+ except Exception as e:
148
+ logger.error(f"Error replacing Hugging Face dataset: {e}")
149
+ raise
150
 
151
  # Process dataset
152
  result = subprocess.run(['python', 'process_hf_dataset.py'], check=True, capture_output=True, text=True, cwd=os.path.dirname(__file__))
 
164
  reconstructed_code=None,
165
  code_input=None,
166
  query_results=None,
167
+ message="Hugging Face dataset processed and stored successfully with fresh database and empty dataset."
168
  )
169
  except subprocess.CalledProcessError as e:
170
  logger.error(f"Error processing Hugging Face dataset: {e.stderr}")
 
183
  if collection is None or not hasattr(collection, 'add'):
184
  raise ValueError("ChromaDB collection access failed")
185
  logger.info("Verified ChromaDB collection is valid")
186
+ # Verify collection state
187
+ count = collection.count()
188
+ logger.info(f"ChromaDB contains {count} entries before loading")
189
 
190
  # Process dataset
191
  result = subprocess.run(['python', 'process_hf_dataset.py'], check=True, capture_output=True, text=True, cwd=os.path.dirname(__file__))
 
212
  logger.error(f"Unexpected error loading Hugging Face dataset: {e}")
213
  return f"Unexpected error loading Hugging Face dataset: {e}", 500
214
  elif 'reset_db' in request.form:
215
+ # Reset ChromaDB collection and Hugging Face dataset (no repopulation with samples)
216
  try:
217
  client = init_chromadb()
218
  try:
 
229
  # Verify collection is empty
230
  count = collection.count()
231
  logger.info(f"ChromaDB now contains {count} entries after reset (should be 0)")
232
+
233
+ # Reset Hugging Face dataset (replace with empty dataset)
234
+ try:
235
+ empty_data = {
236
+ "code": [],
237
+ "sequence": [],
238
+ "vectors": [],
239
+ "description_tokens": [],
240
+ "program_vectors": []
241
+ }
242
+ empty_dataset = Dataset.from_dict(empty_data)
243
+ empty_dataset.push_to_hub(HF_DATASET_NAME, token=os.getenv("HF_KEY"))
244
+ logger.info(f"Replaced Hugging Face dataset {HF_DATASET_NAME} with empty dataset")
245
+ except Exception as e:
246
+ logger.error(f"Error replacing Hugging Face dataset: {e}")
247
+ raise
248
+
249
  return render_template(
250
  'results_partial.html',
251
  parts=None,
 
253
  reconstructed_code=None,
254
  code_input=None,
255
  query_results=None,
256
+ message="Database and Hugging Face dataset reset successfully."
257
  )
258
  except Exception as e:
259
  logger.error(f"Error resetting database: {e}")