Shafaq25 commited on
Commit
3195b02
·
verified ·
1 Parent(s): 0eb6f02

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -0
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import logging
4
+ import gradio as gr
5
+ from pinecone import Pinecone, ServerlessSpec
6
+ from langchain_pinecone import PineconeVectorStore
7
+ from langchain_community.document_loaders import TextLoader
8
+ from langchain.text_splitter import CharacterTextSplitter
9
+ from langchain.chains import RetrievalQA
10
+ from langchain_community.llms import OpenAI
11
+ from langchain_openai import OpenAIEmbeddings
12
+
13
+ # --- Logging ---
14
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
15
+
16
+ # --- Environment Variables ---
17
+ api_key = os.getenv("PINECONE_API_KEY")
18
+ openai_api_key = os.getenv("OPENAI_API_KEY")
19
+
20
+ if not api_key:
21
+ raise ValueError("Please set the PINECONE_API_KEY as an environment variable.")
22
+ if not openai_api_key:
23
+ raise ValueError("Please set the OPENAI_API_KEY as an environment variable.")
24
+ os.environ["OPENAI_API_KEY"] = openai_api_key
25
+
26
+ # --- Pinecone Setup ---
27
+ index_name = "quickstart"
28
+ dimension = 1536
29
+ pc = Pinecone(api_key=api_key)
30
+
31
+ # Create index if not exists
32
+ if index_name not in [idx['name'] for idx in pc.list_indexes()]:
33
+ pc.create_index(
34
+ name=index_name,
35
+ dimension=dimension,
36
+ metric="euclidean",
37
+ spec=ServerlessSpec(cloud="aws", region="us-east-1")
38
+ )
39
+
40
+ # --- Load and Process Document ---
41
+ os.makedirs("data/paul_graham", exist_ok=True)
42
+ file_path = "data/paul_graham/paul_graham_essay.txt"
43
+ if not os.path.exists(file_path):
44
+ import requests
45
+ url = "https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt"
46
+ r = requests.get(url)
47
+ with open(file_path, "w") as f:
48
+ f.write(r.text)
49
+
50
+ loader = TextLoader(file_path)
51
+ documents = loader.load()
52
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
53
+ texts = text_splitter.split_documents(documents)
54
+
55
+ # --- Embedding and Vector Store ---
56
+ embeddings = OpenAIEmbeddings()
57
+ docsearch = PineconeVectorStore.from_documents(texts, embeddings, index_name=index_name)
58
+
59
+ # --- Query Engine Setup ---
60
+ llm = OpenAI()
61
+ retriever = docsearch.as_retriever()
62
+ qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
63
+
64
+ # --- Query Function ---
65
+ def ask_question(prompt):
66
+ try:
67
+ response = qa.run(prompt)
68
+ return str(response)
69
+ except Exception as e:
70
+ return f"❌ Error: {str(e)}"
71
+
72
+ # --- Gradio UI ---
73
+ with gr.Blocks(css="""body { background-color: #f5f5dc; font-family: 'Georgia', 'Merriweather', serif;}h1, h2, h3 { color: #4e342e;}.gr-box, .gr-column, .gr-group { border-radius: 15px; padding: 20px; background-color: #fffaf0; box-shadow: 2px 4px 14px rgba(0, 0, 0, 0.1); margin-top: 10px;}textarea, input[type="text"] { background-color: #fffaf0; border: 1px solid #d2b48c; color: #4e342e; border-radius: 8px;}button { background-color: #a1887f; color: white; font-weight: bold; border-radius: 8px; transition: background-color 0.3s ease;}button:hover { background-color: #8d6e63;}.gr-button { border-radius: 8px !important;}""") as demo:
74
+ with gr.Column():
75
+ gr.Markdown("""
76
+ <div style='text-align: center;'>
77
+ <h1>🧠 Paul Graham Essay Q&A</h1>
78
+ <div style='font-size: 1.1em; color: #6d4c41; margin-bottom: 1em;'>
79
+ Explore insights from Paul Graham's essay using semantic search powered by <strong>LangChain</strong> + <strong>Pinecone</strong>.
80
+ </div>
81
+ </div>
82
+ """)
83
+ with gr.Accordion("ℹ️ What is Pinecone Vector Indexing?", open=False):
84
+ gr.Markdown("""**Pinecone** is a vector database that stores document embeddings (numeric representations of meaning). When you ask a question, it's converted into a vector and compared against stored vectors to find the most relevant answers — even if they don't match word-for-word.""")
85
+ gr.Markdown("### 📖 Ask your question below:")
86
+ with gr.Group():
87
+ with gr.Row():
88
+ user_input = gr.Textbox(
89
+ placeholder="E.g., What does Paul Graham say about startups?",
90
+ label="Your Question",
91
+ lines=2
92
+ )
93
+ with gr.Row():
94
+ output = gr.Textbox(label="Answer", lines=6)
95
+ with gr.Row():
96
+ submit_btn = gr.Button("🔍 Search Essay")
97
+ clear_btn = gr.Button("🧹 Clear")
98
+ submit_btn.click(fn=ask_question, inputs=user_input, outputs=output)
99
+ clear_btn.click(fn=lambda: ("", ""), inputs=None, outputs=[user_input, output])
100
+
101
+ demo.launch()