Spaces:
Runtime error
Runtime error
File size: 2,416 Bytes
01752fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import os
import random
from src.helper import load_pdf, text_split, download_hugging_face_embeddings
from langchain.vectorstores import Pinecone
import pinecone
from dotenv import load_dotenv
extracted_data = load_pdf("data/")
text_chunks = text_split(extracted_data)
embeddings = download_hugging_face_embeddings()
if not os.environ.get("PINECONE_API_KEY"):
from pinecone_notebooks import authenticate
authenticate()
api_key = os.environ.get("PINECONE_API_KEY")
from pinecone import Pinecone
pc = Pinecone(api_key=api_key)
from pinecone import ServerlessSpec
cloud = os.environ.get('PINECONE_CLOUD') or 'aws'
region = os.environ.get('PINECONE_REGION') or 'us-east-1'
spec = ServerlessSpec(cloud=cloud, region=region)
index_name = "medical-chatbot"
import time
import random
if index_name not in pc.list_indexes().names():
pc.create_index(
name=index_name,
dimension=384,
metric="cosine",
spec=spec
)
# wait for index to be ready
while not pc.describe_index(index_name).status['ready']:
time.sleep(1)
index = pc.Index(index_name)
time.sleep(1)
# Generate random 384-dimensional vectors
def generate_random_vector(dim):
return [random.uniform(-1, 1) for _ in range(dim)]
upsert1 = index.upsert(
vectors=[
{"id": "vec1", "values": generate_random_vector(384)},
{"id": "vec2", "values": generate_random_vector(384)},
{"id": "vec3", "values": generate_random_vector(384)},
],
namespace="ns1"
)
print(upsert1)
upsert2 = index.upsert(
vectors=[
{"id": "vec1", "values": generate_random_vector(384)},
{"id": "vec2", "values": generate_random_vector(384)},
{"id": "vec3", "values": generate_random_vector(384)},
],
namespace="ns2"
)
print(upsert2)
print(index.describe_index_stats())
query_vector_ns1 = generate_random_vector(384)
query_results1 = index.query(
namespace="ns1",
vector=query_vector_ns1,
top_k=3,
include_values=True
)
print(query_results1)
query_vector_ns2 = generate_random_vector(384)
query_results2 = index.query(
namespace="ns2",
vector=query_vector_ns2,
top_k=3,
include_values=True
)
print(query_results2)
# docsearch=Pinecone.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name) |