Spaces:
Runtime error
Runtime error
import os | |
from pinecone import Pinecone as PineconeClient, ServerlessSpec | |
from langchain_community.vectorstores import Pinecone | |
from langchain_openai import OpenAIEmbeddings | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from dotenv import load_dotenv, find_dotenv | |
load_dotenv(find_dotenv(), override=True) | |
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY') | |
PINECONE_ENV = os.environ.get('PINECONE_ENV') | |
embeddings = OpenAIEmbeddings() | |
loader = PyPDFLoader("docs/M92TB4_2023-24_online.pdf") | |
data = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0) | |
texts = text_splitter.split_documents(data) | |
pinecone = PineconeClient( | |
api_key=PINECONE_API_KEY | |
) | |
index_name = "linuxtips" | |
if index_name not in pinecone.list_indexes().names(): | |
pinecone.create_index( | |
name=index_name, | |
dimension=1536, | |
metric='euclidean', | |
spec=ServerlessSpec( | |
cloud="aws", | |
region="us-east-1" | |
) | |
) | |
index = pinecone.Index(index_name) | |
docsearch = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name=index_name) | |
# Test | |
query = "Assistant, please tell me what are the main functions of an autarchy department" | |
docs = docsearch.similarity_search(query) | |
print(docs[0].page_content) | |