Spaces:
Paused
Paused
Commit
•
59e569d
1
Parent(s):
b1335de
add in rollups calculator
Browse files- db/db_utils.py +25 -0
- rollups.py +73 -0
db/db_utils.py
CHANGED
@@ -53,6 +53,29 @@ def initialize_db(conn):
|
|
53 |
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
54 |
)
|
55 |
''')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
cursor.execute('''
|
57 |
CREATE TABLE IF NOT EXISTS results (
|
58 |
id BIGSERIAL PRIMARY KEY,
|
@@ -86,6 +109,8 @@ def initialize_db(conn):
|
|
86 |
CREATE INDEX IF NOT EXISTS idx_cleaned_word ON mappings(cleaned_word);
|
87 |
CREATE INDEX IF NOT EXISTS idx_dictionary_word ON mappings(dictionary_word);
|
88 |
CREATE INDEX IF NOT EXISTS idx_description ON dictionary(description);
|
|
|
|
|
89 |
''')
|
90 |
|
91 |
conn.commit()
|
|
|
53 |
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
54 |
)
|
55 |
''')
|
56 |
+
cursor.execute('''
|
57 |
+
CREATE TABLE IF NOT EXISTS donations (
|
58 |
+
run_key TEXT,
|
59 |
+
run_row INTEGER,
|
60 |
+
year TEXT,
|
61 |
+
from TEXT,
|
62 |
+
to TEXT,
|
63 |
+
total_emissions_reduction REAL,
|
64 |
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
65 |
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
66 |
+
)
|
67 |
+
''')
|
68 |
+
cursor.execute('''
|
69 |
+
CREATE TABLE IF NOT EXISTS rollups (
|
70 |
+
run_key TEXT,
|
71 |
+
year TEXT,
|
72 |
+
donations_discount REAL,
|
73 |
+
total_emissions_reduction_pre REAL,
|
74 |
+
total_emissions_reduction REAL,
|
75 |
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
76 |
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
77 |
+
)
|
78 |
+
''')
|
79 |
cursor.execute('''
|
80 |
CREATE TABLE IF NOT EXISTS results (
|
81 |
id BIGSERIAL PRIMARY KEY,
|
|
|
109 |
CREATE INDEX IF NOT EXISTS idx_cleaned_word ON mappings(cleaned_word);
|
110 |
CREATE INDEX IF NOT EXISTS idx_dictionary_word ON mappings(dictionary_word);
|
111 |
CREATE INDEX IF NOT EXISTS idx_description ON dictionary(description);
|
112 |
+
CREATE UNIQUE INDEX IF NOT EXISTS run_row_run_key_uniq ON results(run_key text_ops,run_row int4_ops);
|
113 |
+
CREATE UNIQUE INDEX IF NOT EXISTS run_row_run_key_uniq ON donations(run_key text_ops,run_row int4_ops);
|
114 |
''')
|
115 |
|
116 |
conn.commit()
|
rollups.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from db_utils import get_connection
|
2 |
+
import re
|
3 |
+
|
4 |
+
# Function to extract the year
|
5 |
+
def extract_year(string):
|
6 |
+
match = re.search(r'\b(19|20)\d{2}\b', string)
|
7 |
+
if match:
|
8 |
+
return match.group()
|
9 |
+
else:
|
10 |
+
return None
|
11 |
+
|
12 |
+
db_conn = get_connection()
|
13 |
+
db_cursor = db_conn.cursor()
|
14 |
+
|
15 |
+
db_cursor.execute("SELECT name from donors")
|
16 |
+
donor_results = db_cursor.fetchall()
|
17 |
+
list_of_valid_donors = [row[0] for row in donor_results]
|
18 |
+
|
19 |
+
|
20 |
+
db_cursor.execute("SELECT distinct(run_key) FROM results")
|
21 |
+
result_rows = db_cursor.fetchall()
|
22 |
+
|
23 |
+
run_keys_and_years = []
|
24 |
+
for row in result_rows:
|
25 |
+
run_key = row[0]
|
26 |
+
# calculate the total_emissions_reductions of items that were donated
|
27 |
+
# or received as donations
|
28 |
+
|
29 |
+
# to do this, we parse the 4 digit year from the run_key
|
30 |
+
year = extract_year(run_key)
|
31 |
+
run_keys_and_years.append((run_key, year))
|
32 |
+
|
33 |
+
|
34 |
+
for run_key, year in run_keys_and_years:
|
35 |
+
donations_to = 0
|
36 |
+
donations_from = 0
|
37 |
+
|
38 |
+
# find all run_keys for the same year
|
39 |
+
run_keys_in_year = [run_key for run_key, y in run_keys_and_years if y == year]
|
40 |
+
|
41 |
+
# find all donations from the current donor
|
42 |
+
db_cursor.execute("SELECT sum(total_emissions_reduction) FROM results WHERE run_key IN %s and run_key != %s and donor in %s", (tuple(run_keys_in_year), run_key, tuple(list_of_valid_donors)))
|
43 |
+
result = db_cursor.fetchone()
|
44 |
+
if result:
|
45 |
+
donations_from = result[0]
|
46 |
+
|
47 |
+
# divide the donations_from by 2 because we attribute the emissions reductions to both the donor and the recipient
|
48 |
+
donations_from = donations_from / 2
|
49 |
+
|
50 |
+
# find all donations to the current donor
|
51 |
+
db_cursor.execute("SELECT sum(total_emissions_reduction) FROM results WHERE run_key = %s donor in %s", (run_key, tuple(list_of_valid_donors)))
|
52 |
+
result = db_cursor.fetchone()
|
53 |
+
if result:
|
54 |
+
donations_to = result[0]
|
55 |
+
|
56 |
+
# divide the donations_to by 2 because we attribute the emissions reductions to both the donor and the recipient
|
57 |
+
donations_to = donations_to / 2
|
58 |
+
|
59 |
+
# calculate the total_donations
|
60 |
+
donations = donations_to + donations_from
|
61 |
+
|
62 |
+
# calculate total_emissions_reduction_pre
|
63 |
+
db_cursor.execute("SELECT sum(total_emissions_reduction) FROM results WHERE run_key = %s", (run_key,))
|
64 |
+
result = db_cursor.fetchone()
|
65 |
+
if result:
|
66 |
+
total_emissions_reduction_pre = result[0]
|
67 |
+
else:
|
68 |
+
total_emissions_reduction_pre = 0
|
69 |
+
|
70 |
+
total_emissions_reduction = total_emissions_reduction_pre - donations
|
71 |
+
|
72 |
+
# store this data in the rollups table
|
73 |
+
db_cursor.execute("INSERT INTO rollups (run_key, year, donations_discount, total_emissions_reduction_pre, total_emissions_reduction) VALUES (%s, %s, %s, %s, %s)", (run_key, year, donations, total_emissions_reduction_pre, total_emissions_reduction, ))
|