Spaces:

Dylan-Kaneshiro
/

Text-to-SQL

Sleeping

App Files Files Community

Dylan-Kaneshiro commited on Dec 16, 2023

Commit

8fb353c

1 Parent(s): 0aa5d4d

Create create_query_engine.py

Browse files

Files changed (1) hide show

create_query_engine.py +56 -0

create_query_engine.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import sqlalchemy
+from langchain.document_loaders import PyPDFLoader
+import pandas as pd
+from llama_index.objects import (
+    SQLTableNodeMapping,
+    ObjectIndex,
+    SQLTableSchema,
+)
+from llama_index import SQLDatabase
+from llama_index.indices.vector_store.base import VectorStoreIndex
+from llama_index.indices.struct_store import SQLTableRetrieverQueryEngine
+def read_context_pdf(file):
+    filepath = file.name
+    loader = PyPDFLoader(filepath)
+    pages = loader.load()
+    content = "".join([page.page_content for page in pages])
+    content = [c.lstrip() for c in content.split(";")]
+    content = [c.split(":") for c in content]
+    return content
+def query(engine, sql_query):
+    with engine.begin() as conn:
+        df = pd.read_sql_query(sqlalchemy.text(sql_query), conn)
+    return df
+def create_query_engine(context_pdf, username, password, host, port, mydatabase):
+    # Parse context pdf
+    context = read_context_pdf(context_pdf)
+    # create sql engine
+    pg_uri = f"postgresql+psycopg2://{username}:{password}@{host}:{port}/{mydatabase}"
+    engine = sqlalchemy.create_engine(pg_uri)
+    sql_database = SQLDatabase(engine)
+    # create context mapping
+    table_node_mapping = SQLTableNodeMapping(sql_database)
+    table_schema_objs = [(SQLTableSchema(table_name=c[0], context_str=c[1])) for c in context]
+    obj_index = ObjectIndex.from_objects(
+        table_schema_objs,
+        table_node_mapping,
+        VectorStoreIndex,
+    )
+    query_engine = SQLTableRetrieverQueryEngine(
+        sql_database, obj_index.as_retriever(similarity_top_k=3)
+    )
+    return query_engine, engine, "Connection good"