Spaces:

boettiger-lab
/

pad-us

Sleeping

App Files Files Community

cboettig commited on Apr 10, 2024

Commit

4c44a48

1 Parent(s): 65f8ab7

fix typo

Browse files

Files changed (3) hide show

.gitattributes +1 -0
app.py +3 -3
chatmap.py +52 -90

.gitattributes CHANGED Viewed

@@ -37,3 +37,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.html filter=lfs diff=lfs merge=lfs -text
 *.png filter=lfs diff=lfs merge=lfs -text
 *.pmtiles filter=lfs diff=lfs merge=lfs -text

 *.html filter=lfs diff=lfs merge=lfs -text
 *.png filter=lfs diff=lfs merge=lfs -text
 *.pmtiles filter=lfs diff=lfs merge=lfs -text
+*.duckdb filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -236,9 +236,9 @@ with st.sidebar:
         m.add_cog_layer("https://data.source.coop/cboettig/carbon/cogs/irrecoverable_c_total_2018.tif",
                         palette="purples", name="Irrecoverable Carbon", transparent_bg=True, opacity = 0.8, zoom_to_layer=False)
-    if st.toggle("Managable Carbon"):
-       m.add_cog_layer("https://data.source.coop/cboettig/carbon/cogs/managable_c_total_2018.tif",
-                        palette="greens", name="Managable Carbon", transparent_bg=True, opacity = 0.8, zoom_to_layer=False)
     if st.toggle("Human Impact"):
         hi="https://data.source.coop/vizzuality/hfp-100/hfp_2021_100m_v1-2_cog.tif"

         m.add_cog_layer("https://data.source.coop/cboettig/carbon/cogs/irrecoverable_c_total_2018.tif",
                         palette="purples", name="Irrecoverable Carbon", transparent_bg=True, opacity = 0.8, zoom_to_layer=False)
+    if st.toggle("Manageable Carbon"):
+       m.add_cog_layer("https://data.source.coop/cboettig/carbon/cogs/manageable_c_total_2018.tif",
+                        palette="greens", name="Manageable Carbon", transparent_bg=True, opacity = 0.8, zoom_to_layer=False)
     if st.toggle("Human Impact"):
         hi="https://data.source.coop/vizzuality/hfp-100/hfp_2021_100m_v1-2_cog.tif"

chatmap.py CHANGED Viewed

@@ -1,96 +1,58 @@
 import streamlit as st
-from openai import OpenAI
-import duckdb
 import duckdb
-duckdb.install_extension("spatial")
-duckdb.load_extension("spatial")
-duckdb.install_extension("httpfs")
-duckdb.load_extension("httpfs")
-duckdb.sql("create or replace view pad as select * from read_parquet('https://data.source.coop/cboettig/pad-us-3/pad-mobi.parquet')")
-st.title("ChatGPT SQL Assistant")
-# Set OpenAI API key from Streamlit secrets
-client = OpenAI(api_key=st.secrets["OPENAI_API_KEY"])
-# Set a default model
-if "openai_model" not in st.session_state:
-    st.session_state["openai_model"] = "gpt-3.5-turbo"
-# "gpt-4"
-# Initialize chat history
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-setup = '''
-You are a database administrator, and expert in SQL. You will be helping me write complex SQL queries. I will explain you my needs, you will generate SQL queries against my database.
-My application does: Conservation prioritization of protected areas to help meet US 30x30 conservation goals.
-Please reply only with the SQL code that I will need to execute. Do not include an explanation of the code.
-The database is a POSTGIS Postgres database, please take it into consideration when generating PLSQL/SQL. Please avoid ST_Within queries if possible, because they are so slow.
-I will provide you with a description of the structure of my tables. You must remember them and use them for generating SQL queries. Once you read them all, just answer OK, nothing else.
-Here are the tables :
-Table "pad"
-┌─────────────┬─────────────┬─────────┬─────────┬─────────┬─────────┐
-│ column_name │ column_type │  null   │   key   │ default │  extra  │
-│   varchar   │   varchar   │ varchar │ varchar │ varchar │ varchar │
-├─────────────┼─────────────┼─────────┼─────────┼─────────┼─────────┤
-│ FID         │ INTEGER     │ YES     │ NULL    │ NULL    │ NULL    │
-│ time        │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
-│ rsr         │ DOUBLE      │ YES     │ NULL    │ NULL    │ NULL    │
-│ richness    │ DOUBLE      │ YES     │ NULL    │ NULL    │ NULL    │
-│ bucket      │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
-│ FeatClass   │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
-│ Mang_Name   │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
-│ Mang_Type   │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
-│ Des_Tp      │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
-│ Pub_Access  │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
-│ GAP_Sts     │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
-│ IUCN_Cat    │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
-│ Unit_Nm     │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
-│ area        │ DOUBLE      │ YES     │ NULL    │ NULL    │ NULL    │
-│ geometry    │ BLOB        │ YES     │ NULL    │ NULL    │ NULL    │
-├─────────────┴─────────────┴─────────┴─────────┴─────────┴─────────┤
-└───────────────────────────────────────────────────────────────────┘
-'''
-# Display chat messages from history on app rerun
-for message in st.session_state.messages:
-    with st.chat_message(message["role"]):
-        st.markdown(message["content"])
-# Accept user input
-if prompt := st.chat_input("What is the total area in each GAP_Sts?"):
-    # Add user message to chat history
-    st.session_state.messages.append({"role": "system", "content": setup})
-    st.session_state.messages.append({"role": "user", "content": prompt})
-    # Display user message in chat message container
-    # with st.chat_message("user"):
-        #st.markdown(prompt)
-  # Display assistant response in chat message container
     with st.chat_message("assistant"):
-        stream = client.chat.completions.create(
-            model=st.session_state["openai_model"],
-            messages=[
-                {"role": m["role"], "content": m["content"]}
-                for m in st.session_state.messages
-            ],
-            stream=True,
-        )
-        response = st.write_stream(stream)
-        st.divider()
-        df = duckdb.sql(response).df()
-        st.table(df)
-    st.session_state.messages.append({"role": "assistant", "content": response})

+# This example does not use a langchain agent,
+# The langchain sql chain has knowledge of the database, but doesn't interact with it becond intialization.
+# The output of the sql chain is parsed seperately and passed to `duckdb.sql()` by streamlit
 import streamlit as st
+## Database connection
+from sqlalchemy import create_engine
+from langchain.sql_database import SQLDatabase
+db_uri = "duckdb:///pad.duckdb"
+engine = create_engine(db_uri, connect_args={'read_only': True})
+db = SQLDatabase(engine, view_support=True)
 import duckdb
+con = duckdb.connect("pad.duckdb", read_only=True)
+con.install_extension("spatial")
+con.load_extension("spatial")
+## ChatGPT Connection
+from langchain_openai import ChatOpenAI
+chatgpt_llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, api_key=st.secrets["OPENAI_API_KEY"])
+chatgpt4_llm = ChatOpenAI(model="gpt-4", temperature=0, api_key=st.secrets["OPENAI_API_KEY"])
+# Requires ollama server running locally
+from langchain_community.llms import Ollama
+## # from langchain_community.llms import ChatOllama
+ollama_llm = Ollama(model="duckdb-nsql", temperature=0)
+models = {"ollama": ollama_llm, "chatgpt3.5": chatgpt_llm, "chatgpt4": chatgpt4_llm}
+with st.sidebar:
+    choice = st.radio("Select an LLM:", models)
+    llm = models[choice]
+## A SQL Chain
+from langchain.chains import create_sql_query_chain
+chain = create_sql_query_chain(llm, db)
+# agent does not work
+# agent = create_sql_agent(llm, db=db, verbose=True)
+if prompt := st.chat_input():
+    st.chat_message("user").write(prompt)
     with st.chat_message("assistant"):
+        response = chain.invoke({"question": prompt})
+        st.write(response)
+        tbl = con.sql(response).to_df()
+        st.dataframe(tbl)
+# duckdb_sql fails but chatgpt3.5 succeeds with a query like:
+# use the st_area function and st_GeomFromWKB functions to compute the area of the Shape column in the fee table, and then use that to compute the total area under each GAP_Sts category
+# Federal agencies are identified as 'FED' in the Mang_Type column in the 'combined' data table. The Mang_Name column indicates the different agencies. Which federal agencies manage the greatest area of GAP_Sts 1 or 2 land?