|
|
|
import os |
|
import logging |
|
import gradio as gr |
|
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext |
|
from llama_index.vector_stores.pinecone import PineconeVectorStore |
|
from pinecone import Pinecone, ServerlessSpec |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
|
|
|
api_key = os.environ["PINECONE_API_KEY"] |
|
|
|
|
|
pc = Pinecone(api_key=api_key) |
|
index_name = "quickstart" |
|
dimension = 1536 |
|
|
|
|
|
if index_name in [idx['name'] for idx in pc.list_indexes()]: |
|
pc.delete_index(index_name) |
|
|
|
|
|
pc.create_index( |
|
name=index_name, |
|
dimension=dimension, |
|
metric="euclidean", |
|
spec=ServerlessSpec(cloud="aws", region="us-east-1"), |
|
) |
|
|
|
pinecone_index = pc.Index(index_name) |
|
|
|
|
|
os.makedirs("data/paul_graham", exist_ok=True) |
|
file_path = "data/paul_graham/paul_graham_essay.txt" |
|
if not os.path.exists(file_path): |
|
import urllib.request |
|
urllib.request.urlretrieve( |
|
"https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt", |
|
file_path |
|
) |
|
|
|
|
|
documents = SimpleDirectoryReader("data/paul_graham/").load_data() |
|
|
|
|
|
vector_store = PineconeVectorStore(pinecone_index=pinecone_index) |
|
storage_context = StorageContext.from_defaults(vector_store=vector_store) |
|
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context) |
|
|
|
query_engine = index.as_query_engine() |
|
|
|
|
|
def query_doc(prompt): |
|
try: |
|
response = query_engine.query(prompt) |
|
return str(response) |
|
except Exception as e: |
|
return f"Error: {str(e)}" |
|
|
|
|
|
gr.Interface( |
|
fn=query_doc, |
|
inputs=gr.Textbox(label="Ask a question about the document"), |
|
outputs=gr.Textbox(label="Answer"), |
|
title="Paul Graham Document QA (LlamaIndex + Pinecone)", |
|
description="Ask questions based on the indexed Paul Graham essay. Powered by LlamaIndex & Pinecone." |
|
).launch() |
|
|