import openai import pinecone from llama_index import StorageContext, VectorStoreIndex, download_loader from llama_index.vector_stores import PineconeVectorStore from environments import PINECONE_API_KEY, PINECONE_INDEX, OPENAI_API_KEY openai.api_key = OPENAI_API_KEY print('Start Loading Data ...') UnstructuredURLLoader = download_loader("UnstructuredURLLoader") urls = [ 'https://tmdhc.org.hk/tc/intro', 'https://tmdhc.org.hk/tc/service-intro', 'https://tmdhc.org.hk/tc/service-flow', 'https://tmdhc.org.hk/tc/contact-us', 'https://tmdhc.org.hk/tc/service-charge', ] loader = UnstructuredURLLoader(urls=urls, continue_on_failure=True, headers={ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537", }) documents = loader.load() pinecone.init( api_key=PINECONE_API_KEY, environment='gcp-starter' ) pinecone_index = pinecone.Index(PINECONE_INDEX) vector_store = PineconeVectorStore(pinecone_index=pinecone_index) storage_context = StorageContext.from_defaults(vector_store=vector_store) index = VectorStoreIndex.from_documents(documents, storage_context=storage_context) print('Done!')