broadfield-dev commited on
Commit
1c2a481
·
verified ·
1 Parent(s): b06c06e

Update database.py

Browse files
Files changed (1) hide show
  1. database.py +6 -6
database.py CHANGED
@@ -7,14 +7,14 @@ import numpy as np
7
  from datasets import Dataset, load_dataset
8
  from transformers import AutoTokenizer, AutoModel
9
  import torch
10
- import dotenv
11
- dotenv.load()
12
 
 
 
13
 
14
- # User-configurable variables
15
  DB_NAME = "python_programs" # ChromaDB collection name
16
  HF_DATASET_NAME = "python_program_vectors" # Hugging Face Dataset name
17
- HF_KEY = os.getenv("HF_KEY") # Replace with your Hugging Face API token
18
  PERSIST_DIR = "./chroma_data" # Directory for persistent storage (optional)
19
  USE_GPU = False # Default to CPU, set to True for GPU if available
20
 
@@ -190,7 +190,7 @@ def generate_semantic_vector(description, use_gpu=USE_GPU):
190
  vector = vector[:6]
191
  return vector
192
 
193
- def save_chromadb_to_hf(dataset_name=HF_DATASET_NAME, token=HF_KEY):
194
  """Save ChromaDB data to Hugging Face Dataset."""
195
  client = init_chromadb()
196
  collection = create_collection(client)
@@ -211,7 +211,7 @@ def save_chromadb_to_hf(dataset_name=HF_DATASET_NAME, token=HF_KEY):
211
  dataset.push_to_hub(dataset_name, token=token)
212
  print(f"Dataset pushed to Hugging Face Hub as {dataset_name}")
213
 
214
- def load_chromadb_from_hf(dataset_name=HF_DATASET_NAME, token=HF_KEY):
215
  """Load ChromaDB data from Hugging Face Dataset, handle empty dataset."""
216
  try:
217
  dataset = load_dataset(dataset_name, split="train", token=token)
 
7
  from datasets import Dataset, load_dataset
8
  from transformers import AutoTokenizer, AutoModel
9
  import torch
10
+ from dotenv import load_dotenv
 
11
 
12
+ # Load environment variables
13
+ load_dotenv()
14
 
15
+ # User-configurable variables (no HF_KEY hardcoded here)
16
  DB_NAME = "python_programs" # ChromaDB collection name
17
  HF_DATASET_NAME = "python_program_vectors" # Hugging Face Dataset name
 
18
  PERSIST_DIR = "./chroma_data" # Directory for persistent storage (optional)
19
  USE_GPU = False # Default to CPU, set to True for GPU if available
20
 
 
190
  vector = vector[:6]
191
  return vector
192
 
193
+ def save_chromadb_to_hf(dataset_name=HF_DATASET_NAME, token=os.getenv("HF_KEY")):
194
  """Save ChromaDB data to Hugging Face Dataset."""
195
  client = init_chromadb()
196
  collection = create_collection(client)
 
211
  dataset.push_to_hub(dataset_name, token=token)
212
  print(f"Dataset pushed to Hugging Face Hub as {dataset_name}")
213
 
214
+ def load_chromadb_from_hf(dataset_name=HF_DATASET_NAME, token=os.getenv("HF_KEY")):
215
  """Load ChromaDB data from Hugging Face Dataset, handle empty dataset."""
216
  try:
217
  dataset = load_dataset(dataset_name, split="train", token=token)