neo4jchat / app33.py
Akshayram1's picture
Rename app.py to app33.py
7316c25 verified
import os
import tempfile
from dotenv import load_dotenv
import streamlit as st
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.prompts import PromptTemplate
from langchain.vectorstores import Neo4jVector
from langchain.graphs import Neo4jGraph
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain.chains.graph_qa.cypher import GraphCypherQAChain
from neo4j import GraphDatabase
# Llama-Index imports
from llama_index.core import SimpleDirectoryReader, KnowledgeGraphIndex, Settings
from llama_index.core.graph_stores import SimpleGraphStore
from llama_index.core import StorageContext
from llama_index.embeddings.openai import OpenAIEmbedding # Fix import path
from llama_index.llms.langchain import LangChainLLM
def main():
st.set_page_config(
layout="wide",
page_title="MayaJal",
page_icon=":graph:"
)
# Debug statement
st.write("Starting the app...")
st.sidebar.image('logo.png', use_column_width=True)
with st.sidebar.expander("Expand Me"):
st.markdown("""
This application allows you to upload a PDF file, extract its content into a Neo4j graph database, and perform queries using natural language.
It leverages LangChain and OpenAI's GPT models to generate Cypher queries that interact with the Neo4j database in real-time.
""")
st.title("Mayajal: Realtime GraphRAG App")
load_dotenv()
# Debug statement
st.write("Loaded environment variables.")
# Set OpenAI API key
if 'OPENAI_API_KEY' not in st.session_state:
st.sidebar.subheader("OpenAI API Key")
openai_api_key = st.sidebar.text_input("Enter your OpenAI API Key:", type='password')
if openai_api_key:
os.environ['OPENAI_API_KEY'] = openai_api_key
st.session_state['OPENAI_API_KEY'] = openai_api_key
st.sidebar.success("OpenAI API Key set successfully.")
embeddings = OpenAIEmbeddings()
llm = ChatOpenAI(model_name="gpt-4o") # Use model that supports function calling
st.session_state['embeddings'] = embeddings
st.session_state['llm'] = llm
# Debug statement
st.write("OpenAI API Key set and models initialized.")
else:
# Debug statement
st.write("OpenAI API Key already set.")
embeddings = st.session_state['embeddings']
llm = st.session_state['llm']
# Initialize variables
neo4j_url = None
neo4j_username = None
neo4j_password = None
graph = None
# Set Neo4j connection details
if 'neo4j_connected' not in st.session_state:
st.sidebar.subheader("Connect to Neo4j Database")
neo4j_url = st.sidebar.text_input("Neo4j URL:", value="neo4j+s://<your-neo4j-url>")
neo4j_username = st.sidebar.text_input("Neo4j Username:", value="neo4j")
neo4j_password = st.sidebar.text_input("Neo4j Password:", type='password')
connect_button = st.sidebar.button("Connect")
if connect_button and neo4j_password:
try:
graph = Neo4jGraph(
url=neo4j_url,
username=neo4j_username,
password=neo4j_password
)
st.session_state['graph'] = graph
st.session_state['neo4j_connected'] = True
# Store connection parameters for later use
st.session_state['neo4j_url'] = neo4j_url
st.session_state['neo4j_username'] = neo4j_username
st.session_state['neo4j_password'] = neo4j_password
st.sidebar.success("Connected to Neo4j database.")
except Exception as e:
st.error(f"Failed to connect to Neo4j: {e}")
else:
graph = st.session_state['graph']
neo4j_url = st.session_state['neo4j_url']
neo4j_username = st.session_state['neo4j_username']
neo4j_password = st.session_state['neo4j_password']
# Ensure that the Neo4j connection is established before proceeding
if graph is not None:
# Debug statement
st.write("Neo4j connection established.")
# File uploader
uploaded_file = st.file_uploader("Please select a PDF file.", type="pdf")
if uploaded_file is not None and 'qa' not in st.session_state:
with st.spinner("Processing the PDF..."):
# Save uploaded file to temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
tmp_file.write(uploaded_file.read())
tmp_file_path = tmp_file.name
# Debug statement
st.write("PDF file uploaded and saved to temporary file.")
# Process document using Llama-Index
index = process_document(tmp_file_path, graph, st.session_state['OPENAI_API_KEY'])
# Store the index in session state
st.session_state['index'] = index
# Use the stored connection parameters
index = Neo4jVector.from_existing_graph(
embedding=embeddings,
url=neo4j_url,
username=neo4j_username,
password=neo4j_password,
database="neo4j",
node_label="Patient", # Adjust node_label as needed
text_node_properties=["id", "text"],
embedding_node_property="embedding",
index_name="vector_index",
keyword_index_name="entity_index",
search_type="hybrid"
)
st.success(f"{uploaded_file.name} preparation is complete.")
# Retrieve the graph schema
schema = graph.get_schema
# Set up the QA chain
template = """
Task: Generate a Cypher statement to query the graph database.
Instructions:
Use only relationship types and properties provided in schema.
Do not use other relationship types or properties that are not provided.
schema:
{schema}
Note: Do not include explanations or apologies in your answers.
Do not answer questions that ask anything other than creating Cypher statements.
Do not include any text other than generated Cypher statements.
Question: {question}"""
question_prompt = PromptTemplate(
template=template,
input_variables=["schema", "question"]
)
qa = GraphCypherQAChain.from_llm(
llm=llm,
graph=graph,
cypher_prompt=question_prompt,
verbose=True,
allow_dangerous_requests=True
)
st.session_state['qa'] = qa
else:
# Debug statement
st.write("Neo4j connection not established.")
st.warning("Please connect to the Neo4j database before you can upload a PDF.")
if 'qa' in st.session_state:
st.subheader("Ask a Question")
with st.form(key='question_form'):
question = st.text_input("Enter your question:")
submit_button = st.form_submit_button(label='Submit')
if submit_button and question:
with st.spinner("Generating answer..."):
res = st.session_state['qa'].invoke({"query": question})
st.write("\n**Answer:**\n" + res['result'])
def process_document(file_path, graph, openai_api_key):
# Configure OpenAI
os.environ["OPENAI_API_KEY"] = openai_api_key
Settings.chunk_size = 512
# Setup LangChain LLM wrapper
llm = ChatOpenAI(temperature=0, model="gpt-4")
Settings.llm = LangChainLLM(llm=llm)
# Setup embeddings
embed_model = OpenAIEmbedding()
# Create graph store
graph_store = SimpleGraphStore()
storage_context = StorageContext.from_defaults(graph_store=graph_store)
# Load and process document
documents = SimpleDirectoryReader(input_files=[file_path]).load_data()
# Create Knowledge Graph Index
index = KnowledgeGraphIndex.from_documents(
documents=documents,
max_triplets_per_chunk=3,
storage_context=storage_context,
embed_model=embed_model,
include_embeddings=True
)
return index
if __name__ == "__main__":
main()