Carl Boettiger commited on
Commit
4c44a48
β€’
1 Parent(s): 65f8ab7
Files changed (3) hide show
  1. .gitattributes +1 -0
  2. app.py +3 -3
  3. chatmap.py +52 -90
.gitattributes CHANGED
@@ -37,3 +37,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
37
  *.html filter=lfs diff=lfs merge=lfs -text
38
  *.png filter=lfs diff=lfs merge=lfs -text
39
  *.pmtiles filter=lfs diff=lfs merge=lfs -text
 
 
37
  *.html filter=lfs diff=lfs merge=lfs -text
38
  *.png filter=lfs diff=lfs merge=lfs -text
39
  *.pmtiles filter=lfs diff=lfs merge=lfs -text
40
+ *.duckdb filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -236,9 +236,9 @@ with st.sidebar:
236
  m.add_cog_layer("https://data.source.coop/cboettig/carbon/cogs/irrecoverable_c_total_2018.tif",
237
  palette="purples", name="Irrecoverable Carbon", transparent_bg=True, opacity = 0.8, zoom_to_layer=False)
238
 
239
- if st.toggle("Managable Carbon"):
240
- m.add_cog_layer("https://data.source.coop/cboettig/carbon/cogs/managable_c_total_2018.tif",
241
- palette="greens", name="Managable Carbon", transparent_bg=True, opacity = 0.8, zoom_to_layer=False)
242
 
243
  if st.toggle("Human Impact"):
244
  hi="https://data.source.coop/vizzuality/hfp-100/hfp_2021_100m_v1-2_cog.tif"
 
236
  m.add_cog_layer("https://data.source.coop/cboettig/carbon/cogs/irrecoverable_c_total_2018.tif",
237
  palette="purples", name="Irrecoverable Carbon", transparent_bg=True, opacity = 0.8, zoom_to_layer=False)
238
 
239
+ if st.toggle("Manageable Carbon"):
240
+ m.add_cog_layer("https://data.source.coop/cboettig/carbon/cogs/manageable_c_total_2018.tif",
241
+ palette="greens", name="Manageable Carbon", transparent_bg=True, opacity = 0.8, zoom_to_layer=False)
242
 
243
  if st.toggle("Human Impact"):
244
  hi="https://data.source.coop/vizzuality/hfp-100/hfp_2021_100m_v1-2_cog.tif"
chatmap.py CHANGED
@@ -1,96 +1,58 @@
 
 
 
 
1
  import streamlit as st
2
- from openai import OpenAI
3
- import duckdb
 
 
 
 
 
4
 
5
  import duckdb
6
- duckdb.install_extension("spatial")
7
- duckdb.load_extension("spatial")
8
- duckdb.install_extension("httpfs")
9
- duckdb.load_extension("httpfs")
10
-
11
- duckdb.sql("create or replace view pad as select * from read_parquet('https://data.source.coop/cboettig/pad-us-3/pad-mobi.parquet')")
12
-
13
- st.title("ChatGPT SQL Assistant")
14
-
15
- # Set OpenAI API key from Streamlit secrets
16
- client = OpenAI(api_key=st.secrets["OPENAI_API_KEY"])
17
-
18
- # Set a default model
19
- if "openai_model" not in st.session_state:
20
- st.session_state["openai_model"] = "gpt-3.5-turbo"
21
- # "gpt-4"
22
-
23
- # Initialize chat history
24
- if "messages" not in st.session_state:
25
- st.session_state.messages = []
26
-
27
-
28
- setup = '''
29
- You are a database administrator, and expert in SQL. You will be helping me write complex SQL queries. I will explain you my needs, you will generate SQL queries against my database.
30
-
31
- My application does: Conservation prioritization of protected areas to help meet US 30x30 conservation goals.
32
-
33
- Please reply only with the SQL code that I will need to execute. Do not include an explanation of the code.
34
-
35
- The database is a POSTGIS Postgres database, please take it into consideration when generating PLSQL/SQL. Please avoid ST_Within queries if possible, because they are so slow.
36
-
37
- I will provide you with a description of the structure of my tables. You must remember them and use them for generating SQL queries. Once you read them all, just answer OK, nothing else.
38
-
39
- Here are the tables :
40
-
41
- Table "pad"
42
- β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
43
- β”‚ column_name β”‚ column_type β”‚ null β”‚ key β”‚ default β”‚ extra β”‚
44
- β”‚ varchar β”‚ varchar β”‚ varchar β”‚ varchar β”‚ varchar β”‚ varchar β”‚
45
- β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
46
- β”‚ FID β”‚ INTEGER β”‚ YES β”‚ NULL β”‚ NULL β”‚ NULL β”‚
47
- β”‚ time β”‚ VARCHAR β”‚ YES β”‚ NULL β”‚ NULL β”‚ NULL β”‚
48
- β”‚ rsr β”‚ DOUBLE β”‚ YES β”‚ NULL β”‚ NULL β”‚ NULL β”‚
49
- β”‚ richness β”‚ DOUBLE β”‚ YES β”‚ NULL β”‚ NULL β”‚ NULL β”‚
50
- β”‚ bucket β”‚ VARCHAR β”‚ YES β”‚ NULL β”‚ NULL β”‚ NULL β”‚
51
- β”‚ FeatClass β”‚ VARCHAR β”‚ YES β”‚ NULL β”‚ NULL β”‚ NULL β”‚
52
- β”‚ Mang_Name β”‚ VARCHAR β”‚ YES β”‚ NULL β”‚ NULL β”‚ NULL β”‚
53
- β”‚ Mang_Type β”‚ VARCHAR β”‚ YES β”‚ NULL β”‚ NULL β”‚ NULL β”‚
54
- β”‚ Des_Tp β”‚ VARCHAR β”‚ YES β”‚ NULL β”‚ NULL β”‚ NULL β”‚
55
- β”‚ Pub_Access β”‚ VARCHAR β”‚ YES β”‚ NULL β”‚ NULL β”‚ NULL β”‚
56
- β”‚ GAP_Sts β”‚ VARCHAR β”‚ YES β”‚ NULL β”‚ NULL β”‚ NULL β”‚
57
- β”‚ IUCN_Cat β”‚ VARCHAR β”‚ YES β”‚ NULL β”‚ NULL β”‚ NULL β”‚
58
- β”‚ Unit_Nm β”‚ VARCHAR β”‚ YES β”‚ NULL β”‚ NULL β”‚ NULL β”‚
59
- β”‚ area β”‚ DOUBLE β”‚ YES β”‚ NULL β”‚ NULL β”‚ NULL β”‚
60
- β”‚ geometry β”‚ BLOB β”‚ YES β”‚ NULL β”‚ NULL β”‚ NULL β”‚
61
- β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€
62
- β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
63
- '''
64
-
65
- # Display chat messages from history on app rerun
66
- for message in st.session_state.messages:
67
- with st.chat_message(message["role"]):
68
- st.markdown(message["content"])
69
-
70
- # Accept user input
71
- if prompt := st.chat_input("What is the total area in each GAP_Sts?"):
72
- # Add user message to chat history
73
- st.session_state.messages.append({"role": "system", "content": setup})
74
-
75
- st.session_state.messages.append({"role": "user", "content": prompt})
76
- # Display user message in chat message container
77
- # with st.chat_message("user"):
78
- #st.markdown(prompt)
79
-
80
- # Display assistant response in chat message container
81
  with st.chat_message("assistant"):
82
- stream = client.chat.completions.create(
83
- model=st.session_state["openai_model"],
84
- messages=[
85
- {"role": m["role"], "content": m["content"]}
86
- for m in st.session_state.messages
87
- ],
88
- stream=True,
89
- )
90
- response = st.write_stream(stream)
91
- st.divider()
92
- df = duckdb.sql(response).df()
93
- st.table(df)
94
- st.session_state.messages.append({"role": "assistant", "content": response})
95
 
 
96
 
 
1
+ # This example does not use a langchain agent,
2
+ # The langchain sql chain has knowledge of the database, but doesn't interact with it becond intialization.
3
+ # The output of the sql chain is parsed seperately and passed to `duckdb.sql()` by streamlit
4
+
5
  import streamlit as st
6
+
7
+ ## Database connection
8
+ from sqlalchemy import create_engine
9
+ from langchain.sql_database import SQLDatabase
10
+ db_uri = "duckdb:///pad.duckdb"
11
+ engine = create_engine(db_uri, connect_args={'read_only': True})
12
+ db = SQLDatabase(engine, view_support=True)
13
 
14
  import duckdb
15
+
16
+ con = duckdb.connect("pad.duckdb", read_only=True)
17
+ con.install_extension("spatial")
18
+ con.load_extension("spatial")
19
+
20
+ ## ChatGPT Connection
21
+ from langchain_openai import ChatOpenAI
22
+ chatgpt_llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, api_key=st.secrets["OPENAI_API_KEY"])
23
+ chatgpt4_llm = ChatOpenAI(model="gpt-4", temperature=0, api_key=st.secrets["OPENAI_API_KEY"])
24
+
25
+
26
+ # Requires ollama server running locally
27
+ from langchain_community.llms import Ollama
28
+ ## # from langchain_community.llms import ChatOllama
29
+ ollama_llm = Ollama(model="duckdb-nsql", temperature=0)
30
+
31
+ models = {"ollama": ollama_llm, "chatgpt3.5": chatgpt_llm, "chatgpt4": chatgpt4_llm}
32
+ with st.sidebar:
33
+ choice = st.radio("Select an LLM:", models)
34
+ llm = models[choice]
35
+
36
+ ## A SQL Chain
37
+ from langchain.chains import create_sql_query_chain
38
+ chain = create_sql_query_chain(llm, db)
39
+
40
+ # agent does not work
41
+ # agent = create_sql_agent(llm, db=db, verbose=True)
42
+
43
+ if prompt := st.chat_input():
44
+ st.chat_message("user").write(prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  with st.chat_message("assistant"):
46
+ response = chain.invoke({"question": prompt})
47
+ st.write(response)
48
+
49
+ tbl = con.sql(response).to_df()
50
+ st.dataframe(tbl)
51
+
52
+
53
+ # duckdb_sql fails but chatgpt3.5 succeeds with a query like:
54
+ # use the st_area function and st_GeomFromWKB functions to compute the area of the Shape column in the fee table, and then use that to compute the total area under each GAP_Sts category
55
+
 
 
 
56
 
57
+ # Federal agencies are identified as 'FED' in the Mang_Type column in the 'combined' data table. The Mang_Name column indicates the different agencies. Which federal agencies manage the greatest area of GAP_Sts 1 or 2 land?
58