Spaces:

broadfield-dev
/

parse_py

Running

App Files Files Community

broadfield-dev commited on Mar 5

Commit

f28324d

verified ·

1 Parent(s): 17dfbee

Update database.py

Browse files

Files changed (1) hide show

database.py +5 -4

database.py CHANGED Viewed

@@ -11,8 +11,9 @@ import torch
 # User-configurable variables
 DB_NAME = "python_programs"  # ChromaDB collection name
 HF_DATASET_NAME = "python_program_vectors"  # Hugging Face Dataset name
-HF_TOKEN = "HF_KEY"  # Replace with your Hugging Face API token
 PERSIST_DIR = "./chroma_data"  # Directory for persistent storage (optional)
 def init_chromadb(persist_dir=PERSIST_DIR):
     """Initialize ChromaDB client, optionally with persistent storage."""
@@ -161,7 +162,7 @@ def generate_description_tokens(sequence, vectors):
             tokens.append(f"span:{vec[3]:.2f}")
     return tokens
-def generate_semantic_vector(description, use_gpu=False):
     """Generate a semantic vector for a textual description using CodeBERT, with CPU/GPU option."""
     # Load CodeBERT model and tokenizer
     model_name = "microsoft/codebert-base"
@@ -186,7 +187,7 @@ def generate_semantic_vector(description, use_gpu=False):
         vector = vector[:6]
     return vector
-def save_chromadb_to_hf(dataset_name=HF_DATASET_NAME, token=HF_TOKEN):
     """Save ChromaDB data to Hugging Face Dataset."""
     client = init_chromadb()
     collection = create_collection(client)
@@ -207,7 +208,7 @@ def save_chromadb_to_hf(dataset_name=HF_DATASET_NAME, token=HF_TOKEN):
     dataset.push_to_hub(dataset_name, token=token)
     print(f"Dataset pushed to Hugging Face Hub as {dataset_name}")
-def load_chromadb_from_hf(dataset_name=HF_DATASET_NAME, token=HF_TOKEN):
     """Load ChromaDB data from Hugging Face Dataset, handle empty dataset."""
     try:
         dataset = load_dataset(dataset_name, split="train", token=token)

 # User-configurable variables
 DB_NAME = "python_programs"  # ChromaDB collection name
 HF_DATASET_NAME = "python_program_vectors"  # Hugging Face Dataset name
+HF_KEY = "YOUR_HUGGINGFACE_TOKEN"  # Replace with your Hugging Face API token
 PERSIST_DIR = "./chroma_data"  # Directory for persistent storage (optional)
+USE_GPU = False  # Default to CPU, set to True for GPU if available
 def init_chromadb(persist_dir=PERSIST_DIR):
     """Initialize ChromaDB client, optionally with persistent storage."""
             tokens.append(f"span:{vec[3]:.2f}")
     return tokens
+def generate_semantic_vector(description, use_gpu=USE_GPU):
     """Generate a semantic vector for a textual description using CodeBERT, with CPU/GPU option."""
     # Load CodeBERT model and tokenizer
     model_name = "microsoft/codebert-base"
         vector = vector[:6]
     return vector
+def save_chromadb_to_hf(dataset_name=HF_DATASET_NAME, token=HF_KEY):
     """Save ChromaDB data to Hugging Face Dataset."""
     client = init_chromadb()
     collection = create_collection(client)
     dataset.push_to_hub(dataset_name, token=token)
     print(f"Dataset pushed to Hugging Face Hub as {dataset_name}")
+def load_chromadb_from_hf(dataset_name=HF_DATASET_NAME, token=HF_KEY):
     """Load ChromaDB data from Hugging Face Dataset, handle empty dataset."""
     try:
         dataset = load_dataset(dataset_name, split="train", token=token)