File size: 2,416 Bytes
01752fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import os
import random
from src.helper import load_pdf, text_split, download_hugging_face_embeddings
from langchain.vectorstores import Pinecone
import pinecone
from dotenv import load_dotenv

extracted_data = load_pdf("data/")
text_chunks = text_split(extracted_data)
embeddings = download_hugging_face_embeddings()

if not os.environ.get("PINECONE_API_KEY"):
    from pinecone_notebooks import authenticate
    authenticate()

api_key = os.environ.get("PINECONE_API_KEY")

from pinecone import Pinecone

pc = Pinecone(api_key=api_key)

from pinecone import ServerlessSpec

cloud = os.environ.get('PINECONE_CLOUD') or 'aws'
region = os.environ.get('PINECONE_REGION') or 'us-east-1'

spec = ServerlessSpec(cloud=cloud, region=region)

index_name = "medical-chatbot"

import time
import random

if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=spec
    )
    # wait for index to be ready
    while not pc.describe_index(index_name).status['ready']:
        time.sleep(1)

index = pc.Index(index_name)
time.sleep(1)

# Generate random 384-dimensional vectors
def generate_random_vector(dim):
    return [random.uniform(-1, 1) for _ in range(dim)]

upsert1 = index.upsert(
    vectors=[
        {"id": "vec1", "values": generate_random_vector(384)},
        {"id": "vec2", "values": generate_random_vector(384)},
        {"id": "vec3", "values": generate_random_vector(384)},
    ],
    namespace="ns1"
)

print(upsert1)

upsert2 = index.upsert(
    vectors=[
        {"id": "vec1", "values": generate_random_vector(384)},
        {"id": "vec2", "values": generate_random_vector(384)},
        {"id": "vec3", "values": generate_random_vector(384)},
    ],
    namespace="ns2"
)

print(upsert2)

print(index.describe_index_stats())

query_vector_ns1 = generate_random_vector(384)
query_results1 = index.query(
    namespace="ns1",
    vector=query_vector_ns1,
    top_k=3,
    include_values=True
)

print(query_results1)

query_vector_ns2 = generate_random_vector(384)
query_results2 = index.query(
    namespace="ns2",
    vector=query_vector_ns2,
    top_k=3,
    include_values=True
)

print(query_results2)

# docsearch=Pinecone.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name)