brightly-ai / rollups.py
beweinreich's picture
pausing here now with run_meta table
132d401
raw
history blame
No virus
3.73 kB
from db.db_utils import get_connection
import re
# Function to extract the year
def extract_year(string):
match = re.search(r'\b(19|20)\d{2}\b', string)
if match:
return match.group()
else:
return None
db_conn = get_connection()
db_cursor = db_conn.cursor()
# find a list of all the alt_spellings of all the organizations
db_cursor.execute("SELECT unnest(alt_spellings) FROM organizations")
donor_results = db_cursor.fetchall()
list_of_valid_donors = [row[0].lower() for row in donor_results]
db_cursor.execute("SELECT run_key, organization_id, year FROM run_meta")
run_meta_rows = db_cursor.fetchall()
for run_meta_row in run_meta_rows:
run_key = run_meta_row[0]
organization_id = run_meta_row[1]
year = run_meta_row[2]
donations_to = 0
donations_from = 0
print(f"Processing run_key: {run_key} and year: {year}")
# find all run_keys for the same year
db_cursor.execute("SELECT run_key FROM run_meta WHERE year = %s", (year,))
run_keys_in_year = db_cursor.fetchall()
run_keys_in_year = [row[0] for row in run_keys_in_year]
# donation discounts work as follows:
# 1. given the current organization, find all donations from the current organization to other organizations in the same year
# 2. find all received donations in the current result set (i.e. run_key)
db_cursor.execute("SELECT unnest(alt_spellings) FROM organizations where id = %s", (organization_id,))
current_org_results = db_cursor.fetchall()
current_org_results = [row[0].lower() for row in current_org_results]
print(current_org_results)
# find all donations from the current donor
db_cursor.execute("SELECT sum(total_emissions_reduction) FROM results WHERE run_key IN %s and run_key != %s and donor in %s", (tuple(run_keys_in_year), run_key, tuple(current_org_results)))
result = db_cursor.fetchone()
if result and result[0]:
donations_from = result[0]
# find all donations to the current donor
query = """
SELECT sum(total_emissions_reduction)
FROM results
WHERE run_key = %s
AND LOWER(TRIM(donor)) = ANY(%s::text[])
"""
db_cursor.execute(query, (run_key, list_of_valid_donors))
result = db_cursor.fetchone()
if result and result[0]:
donations_to = result[0]
# calculate the total_donations
print(f"donations_to: {donations_to}, donations_from: {donations_from}")
donations = donations_to + donations_from
# calculate total_emissions_reduction_pre
db_cursor.execute("SELECT sum(total_emissions_reduction) FROM results WHERE run_key = %s", (run_key,))
result = db_cursor.fetchone()
if result:
total_emissions_reduction_pre = result[0]
else:
total_emissions_reduction_pre = 0
# divide the donations_from by 2 because we attribute the emissions reductions to both the donor and the recipient
donations_discount = donations / 2
total_emissions_reduction = total_emissions_reduction_pre - donations_discount
# store this data in the rollups table
db_cursor.execute("""
INSERT INTO rollups (run_key, year, donations_discount, total_emissions_reduction_pre, total_emissions_reduction)
VALUES (%s, %s, %s, %s, %s)
ON CONFLICT (run_key)
DO UPDATE SET
year = EXCLUDED.year,
donations_discount = EXCLUDED.donations_discount,
total_emissions_reduction_pre = EXCLUDED.total_emissions_reduction_pre,
total_emissions_reduction = EXCLUDED.total_emissions_reduction
""", (run_key, year, donations_discount, total_emissions_reduction_pre, total_emissions_reduction))
db_conn.commit()
db_cursor.close()
db_conn.close()