File size: 1,285 Bytes
f1ba123
 
 
 
 
 
 
 
 
 
 
f7913d7
f1ba123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os
import uuid
import pandas as pd
from qdrant_client import QdrantClient, models
from sentence_transformers import SentenceTransformer

# === Step 1: Ensure Qdrant directory exists ===
if not os.path.exists("qdrant_data"):
    os.makedirs("qdrant_data")

# === Step 2: Load dataset ===
data = pd.read_csv("math_dataset (2).csv")  # Ensure this CSV is present and formatted correctly

# === Step 3: Encode questions ===
embedding_model = SentenceTransformer("intfloat/e5-large")
vectors = embedding_model.encode(data["problem"].tolist(), show_progress_bar=True)

# === Step 4: Initialize local Qdrant client ===
client = QdrantClient(path="qdrant_data")

# === Step 5: Create collection (recreate ensures it's fresh) ===
collection_name = "math_problems"
client.recreate_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(size=vectors.shape[1], distance=models.Distance.COSINE)
)

# === Step 6: Prepare payload and upload with UUIDs ===
payload = data.to_dict(orient="records")
ids = [str(uuid.uuid4()) for _ in range(len(vectors))]

client.upload_collection(
    collection_name=collection_name,
    vectors=vectors,
    payload=payload,
    ids=ids
)

print("✅ Qdrant vector store created and populated successfully in `qdrant_data/`.")