Commit
·
52eb91c
1
Parent(s):
983a080
feat: created queries for talk to ipcc
Browse files
climateqa/engine/talk_to_data/ipcc/queries.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import TypedDict, Optional
|
| 2 |
+
|
| 3 |
+
from climateqa.engine.talk_to_data.ipcc.config import IPCC_DATASET_URL, MACRO_COUNTRIES
|
| 4 |
+
|
| 5 |
+
class IndicatorPerYearAtLocationQueryParams(TypedDict, total=False):
|
| 6 |
+
"""
|
| 7 |
+
Parameters for querying the evolution of an indicator per year at a specific location.
|
| 8 |
+
|
| 9 |
+
Attributes:
|
| 10 |
+
indicator_column (str): Name of the climate indicator column.
|
| 11 |
+
latitude (str): Latitude of the location.
|
| 12 |
+
longitude (str): Longitude of the location.
|
| 13 |
+
country_code (str): Country code.
|
| 14 |
+
admin1 (str): Administrative region (optional).
|
| 15 |
+
"""
|
| 16 |
+
indicator_column: str
|
| 17 |
+
latitude: str
|
| 18 |
+
longitude: str
|
| 19 |
+
country_code: str
|
| 20 |
+
admin1: Optional[str]
|
| 21 |
+
|
| 22 |
+
def indicator_per_year_at_location_query(
|
| 23 |
+
table: str, params: IndicatorPerYearAtLocationQueryParams
|
| 24 |
+
) -> str:
|
| 25 |
+
"""
|
| 26 |
+
Builds an SQL query to get the evolution of an indicator per year at a specific location.
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
table (str): SQL table of the indicator.
|
| 30 |
+
params (IndicatorPerYearAtLocationQueryParams): Dictionary with the required params for the query.
|
| 31 |
+
|
| 32 |
+
Returns:
|
| 33 |
+
str: The SQL query string, or an empty string if required parameters are missing.
|
| 34 |
+
"""
|
| 35 |
+
indicator_column = params.get("indicator_column")
|
| 36 |
+
latitude = params.get("latitude")
|
| 37 |
+
longitude = params.get("longitude")
|
| 38 |
+
country_code = params.get("country_code")
|
| 39 |
+
admin1 = params.get("admin1")
|
| 40 |
+
|
| 41 |
+
if not all([indicator_column, latitude, longitude, country_code]):
|
| 42 |
+
return ""
|
| 43 |
+
|
| 44 |
+
if country_code in MACRO_COUNTRIES:
|
| 45 |
+
table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}_macro.parquet'"
|
| 46 |
+
sql_query = f"""
|
| 47 |
+
SELECT year, scenario, {indicator_column}
|
| 48 |
+
FROM {table_path}
|
| 49 |
+
WHERE admin1 = '{admin1}' AND year >= 1950
|
| 50 |
+
ORDER BY year, scenario
|
| 51 |
+
"""
|
| 52 |
+
else:
|
| 53 |
+
table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}.parquet'"
|
| 54 |
+
sql_query = f"""
|
| 55 |
+
SELECT year, scenario, AVG({indicator_column}) AS {indicator_column}
|
| 56 |
+
FROM {table_path}
|
| 57 |
+
WHERE latitude = {latitude} AND longitude = {longitude} AND year >= 1950
|
| 58 |
+
GROUP BY scenario, year
|
| 59 |
+
ORDER BY year, scenario
|
| 60 |
+
"""
|
| 61 |
+
return sql_query.strip()
|
| 62 |
+
|
| 63 |
+
class IndicatorForGivenYearQueryParams(TypedDict, total=False):
|
| 64 |
+
"""
|
| 65 |
+
Parameters for querying an indicator's values across locations for a specific year.
|
| 66 |
+
|
| 67 |
+
Attributes:
|
| 68 |
+
indicator_column (str): The column name for the climate indicator.
|
| 69 |
+
year (str): The year to query.
|
| 70 |
+
country_code (str): The country code.
|
| 71 |
+
"""
|
| 72 |
+
indicator_column: str
|
| 73 |
+
year: str
|
| 74 |
+
country_code: str
|
| 75 |
+
|
| 76 |
+
def indicator_for_given_year_query(
|
| 77 |
+
table: str, params: IndicatorForGivenYearQueryParams
|
| 78 |
+
) -> str:
|
| 79 |
+
"""
|
| 80 |
+
Builds an SQL query to get the values of an indicator with their latitudes, longitudes,
|
| 81 |
+
and scenarios for a given year.
|
| 82 |
+
|
| 83 |
+
Args:
|
| 84 |
+
table (str): SQL table of the indicator.
|
| 85 |
+
params (IndicatorForGivenYearQueryParams): Dictionary with the required params for the query.
|
| 86 |
+
|
| 87 |
+
Returns:
|
| 88 |
+
str: The SQL query string, or an empty string if required parameters are missing.
|
| 89 |
+
"""
|
| 90 |
+
indicator_column = params.get("indicator_column")
|
| 91 |
+
year = params.get("year") or 2050
|
| 92 |
+
country_code = params.get("country_code")
|
| 93 |
+
|
| 94 |
+
if not all([indicator_column, year, country_code]):
|
| 95 |
+
return ""
|
| 96 |
+
|
| 97 |
+
if country_code in MACRO_COUNTRIES:
|
| 98 |
+
table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}_macro.parquet'"
|
| 99 |
+
sql_query = f"""
|
| 100 |
+
SELECT {indicator_column}, c.latitude, c.longitude, c.admin1, scenario
|
| 101 |
+
FROM {table_path} AS t
|
| 102 |
+
RIGHT JOIN '{IPCC_DATASET_URL}/coordinates.parquet' AS c
|
| 103 |
+
ON c.admin1 = t.admin1 AND c.country_code = t.country_code
|
| 104 |
+
WHERE year = {year}
|
| 105 |
+
ORDER BY latitude, longitude, scenario
|
| 106 |
+
"""
|
| 107 |
+
else:
|
| 108 |
+
table_path = f"'{IPCC_DATASET_URL}/{table.lower()}/{country_code}.parquet'"
|
| 109 |
+
sql_query = f"""
|
| 110 |
+
SELECT AVG({indicator_column}) AS {indicator_column}, latitude, longitude, scenario
|
| 111 |
+
FROM {table_path}
|
| 112 |
+
WHERE year = {year}
|
| 113 |
+
GROUP BY latitude, longitude, scenario
|
| 114 |
+
ORDER BY latitude, longitude, scenario
|
| 115 |
+
"""
|
| 116 |
+
return sql_query.strip()
|