Dylan-Kaneshiro commited on
Commit
8fb353c
1 Parent(s): 0aa5d4d

Create create_query_engine.py

Browse files
Files changed (1) hide show
  1. create_query_engine.py +56 -0
create_query_engine.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sqlalchemy
2
+
3
+ from langchain.document_loaders import PyPDFLoader
4
+
5
+ import pandas as pd
6
+
7
+ from llama_index.objects import (
8
+ SQLTableNodeMapping,
9
+ ObjectIndex,
10
+ SQLTableSchema,
11
+ )
12
+ from llama_index import SQLDatabase
13
+ from llama_index.indices.vector_store.base import VectorStoreIndex
14
+ from llama_index.indices.struct_store import SQLTableRetrieverQueryEngine
15
+
16
+ def read_context_pdf(file):
17
+ filepath = file.name
18
+ loader = PyPDFLoader(filepath)
19
+ pages = loader.load()
20
+ content = "".join([page.page_content for page in pages])
21
+ content = [c.lstrip() for c in content.split(";")]
22
+ content = [c.split(":") for c in content]
23
+ return content
24
+
25
+ def query(engine, sql_query):
26
+ with engine.begin() as conn:
27
+ df = pd.read_sql_query(sqlalchemy.text(sql_query), conn)
28
+ return df
29
+
30
+ def create_query_engine(context_pdf, username, password, host, port, mydatabase):
31
+
32
+ # Parse context pdf
33
+ context = read_context_pdf(context_pdf)
34
+
35
+ # create sql engine
36
+ pg_uri = f"postgresql+psycopg2://{username}:{password}@{host}:{port}/{mydatabase}"
37
+ engine = sqlalchemy.create_engine(pg_uri)
38
+ sql_database = SQLDatabase(engine)
39
+
40
+ # create context mapping
41
+ table_node_mapping = SQLTableNodeMapping(sql_database)
42
+ table_schema_objs = [(SQLTableSchema(table_name=c[0], context_str=c[1])) for c in context]
43
+
44
+ obj_index = ObjectIndex.from_objects(
45
+ table_schema_objs,
46
+ table_node_mapping,
47
+ VectorStoreIndex,
48
+ )
49
+
50
+ query_engine = SQLTableRetrieverQueryEngine(
51
+ sql_database, obj_index.as_retriever(similarity_top_k=3)
52
+ )
53
+
54
+ return query_engine, engine, "Connection good"
55
+
56
+