broadfield-dev commited on
Commit
f28324d
·
verified ·
1 Parent(s): 17dfbee

Update database.py

Browse files
Files changed (1) hide show
  1. database.py +5 -4
database.py CHANGED
@@ -11,8 +11,9 @@ import torch
11
  # User-configurable variables
12
  DB_NAME = "python_programs" # ChromaDB collection name
13
  HF_DATASET_NAME = "python_program_vectors" # Hugging Face Dataset name
14
- HF_TOKEN = "HF_KEY" # Replace with your Hugging Face API token
15
  PERSIST_DIR = "./chroma_data" # Directory for persistent storage (optional)
 
16
 
17
  def init_chromadb(persist_dir=PERSIST_DIR):
18
  """Initialize ChromaDB client, optionally with persistent storage."""
@@ -161,7 +162,7 @@ def generate_description_tokens(sequence, vectors):
161
  tokens.append(f"span:{vec[3]:.2f}")
162
  return tokens
163
 
164
- def generate_semantic_vector(description, use_gpu=False):
165
  """Generate a semantic vector for a textual description using CodeBERT, with CPU/GPU option."""
166
  # Load CodeBERT model and tokenizer
167
  model_name = "microsoft/codebert-base"
@@ -186,7 +187,7 @@ def generate_semantic_vector(description, use_gpu=False):
186
  vector = vector[:6]
187
  return vector
188
 
189
- def save_chromadb_to_hf(dataset_name=HF_DATASET_NAME, token=HF_TOKEN):
190
  """Save ChromaDB data to Hugging Face Dataset."""
191
  client = init_chromadb()
192
  collection = create_collection(client)
@@ -207,7 +208,7 @@ def save_chromadb_to_hf(dataset_name=HF_DATASET_NAME, token=HF_TOKEN):
207
  dataset.push_to_hub(dataset_name, token=token)
208
  print(f"Dataset pushed to Hugging Face Hub as {dataset_name}")
209
 
210
- def load_chromadb_from_hf(dataset_name=HF_DATASET_NAME, token=HF_TOKEN):
211
  """Load ChromaDB data from Hugging Face Dataset, handle empty dataset."""
212
  try:
213
  dataset = load_dataset(dataset_name, split="train", token=token)
 
11
  # User-configurable variables
12
  DB_NAME = "python_programs" # ChromaDB collection name
13
  HF_DATASET_NAME = "python_program_vectors" # Hugging Face Dataset name
14
+ HF_KEY = "YOUR_HUGGINGFACE_TOKEN" # Replace with your Hugging Face API token
15
  PERSIST_DIR = "./chroma_data" # Directory for persistent storage (optional)
16
+ USE_GPU = False # Default to CPU, set to True for GPU if available
17
 
18
  def init_chromadb(persist_dir=PERSIST_DIR):
19
  """Initialize ChromaDB client, optionally with persistent storage."""
 
162
  tokens.append(f"span:{vec[3]:.2f}")
163
  return tokens
164
 
165
+ def generate_semantic_vector(description, use_gpu=USE_GPU):
166
  """Generate a semantic vector for a textual description using CodeBERT, with CPU/GPU option."""
167
  # Load CodeBERT model and tokenizer
168
  model_name = "microsoft/codebert-base"
 
187
  vector = vector[:6]
188
  return vector
189
 
190
+ def save_chromadb_to_hf(dataset_name=HF_DATASET_NAME, token=HF_KEY):
191
  """Save ChromaDB data to Hugging Face Dataset."""
192
  client = init_chromadb()
193
  collection = create_collection(client)
 
208
  dataset.push_to_hub(dataset_name, token=token)
209
  print(f"Dataset pushed to Hugging Face Hub as {dataset_name}")
210
 
211
+ def load_chromadb_from_hf(dataset_name=HF_DATASET_NAME, token=HF_KEY):
212
  """Load ChromaDB data from Hugging Face Dataset, handle empty dataset."""
213
  try:
214
  dataset = load_dataset(dataset_name, split="train", token=token)