Spaces:
Sleeping
Sleeping
ElvarThorS
commited on
Commit
·
46d9bb2
1
Parent(s):
42ecb2b
Fixed bugs, and refactored scoring
Browse files
app/data_processing/get_station_coverage.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
from typing import List, Dict, Tuple
|
2 |
from shapely.geometry import Point, shape
|
|
|
3 |
from shapely import Polygon
|
4 |
from data_processing.get_smallAreaInfo import get_smallAreas
|
5 |
|
@@ -32,6 +33,14 @@ def get_station_coverage(
|
|
32 |
coordinates = area["geometry"]
|
33 |
geometry = Polygon(coordinates)
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
# Check intersection with the station buffer
|
36 |
if geometry.intersects(station_buffer):
|
37 |
# Calculate intersection area
|
|
|
1 |
from typing import List, Dict, Tuple
|
2 |
from shapely.geometry import Point, shape
|
3 |
+
from shapely.validation import make_valid
|
4 |
from shapely import Polygon
|
5 |
from data_processing.get_smallAreaInfo import get_smallAreas
|
6 |
|
|
|
33 |
coordinates = area["geometry"]
|
34 |
geometry = Polygon(coordinates)
|
35 |
|
36 |
+
# Validate and fix invalid geometries
|
37 |
+
if not geometry.is_valid:
|
38 |
+
print(f"Invalid geometry detected for Area ID: {area['id']}. Attempting to fix.")
|
39 |
+
geometry = make_valid(geometry)
|
40 |
+
|
41 |
+
# Simplify geometry to avoid potential issues with highly complex polygons
|
42 |
+
geometry = geometry.simplify(tolerance=0.01, preserve_topology=True)
|
43 |
+
|
44 |
# Check intersection with the station buffer
|
45 |
if geometry.intersects(station_buffer):
|
46 |
# Calculate intersection area
|
app/data_processing/point_scoring.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import csv
|
2 |
import os
|
3 |
-
|
4 |
|
5 |
def score_current(station_coord, df_features, cov_smsv, w_density, w_income, w_age) -> float:
|
6 |
"""
|
@@ -36,15 +36,19 @@ def score_current(station_coord, df_features, cov_smsv, w_density, w_income, w_a
|
|
36 |
|
37 |
# TODO: take into account fjoldi starfandi, if there are more people who work than live => many people need to get there, also works the other way around.
|
38 |
total_score = 0
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
aggregated_age_distribution = {}
|
43 |
aggregated_income_distribution = {}
|
|
|
44 |
|
45 |
for smsv in cov_smsv:
|
46 |
smsv_info = df_features[df_features["smallAreaId"] == smsv["id"]]
|
47 |
|
|
|
|
|
|
|
48 |
# Get age distribution for the year 2024
|
49 |
age_dist = smsv_info["age_distribution"].iloc[0].get(2024, {}) # only interested in 2024 for current score
|
50 |
|
@@ -67,29 +71,53 @@ def score_current(station_coord, df_features, cov_smsv, w_density, w_income, w_a
|
|
67 |
else:
|
68 |
aggregated_income_distribution[income_group] = proportion
|
69 |
|
70 |
-
# Calculate density
|
71 |
-
|
72 |
-
|
73 |
-
# Add to total score
|
74 |
-
total_score += (density_score) * smsv["coverage_percentage"] # TODO: Area of the cricle * percent covered / total area of the small area
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
total_score += age_score
|
79 |
|
80 |
-
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
return {"total_score": total_score,
|
85 |
-
"income_score":
|
86 |
-
"age_score":
|
87 |
-
"density_score":
|
88 |
"age_data": aggregated_age_distribution,
|
89 |
-
"income_data": aggregated_income_distribution
|
|
|
90 |
}
|
91 |
|
92 |
-
def get_age_score(
|
93 |
"""
|
94 |
Calculate a score based on age distribution.
|
95 |
|
@@ -127,17 +155,17 @@ def get_age_score(proportional_age_distribution):
|
|
127 |
}
|
128 |
|
129 |
# Calculate the weighted sum of the age distribution
|
130 |
-
weighted_sum = sum(
|
131 |
|
132 |
# Normalize the score by the total population
|
133 |
-
total_population = sum(
|
134 |
if total_population == 0:
|
135 |
return 0
|
136 |
|
137 |
return weighted_sum / total_population
|
138 |
|
139 |
|
140 |
-
def get_income_score(
|
141 |
"""
|
142 |
Calculate a score based on income distribution.
|
143 |
|
@@ -166,10 +194,10 @@ def get_income_score(proportional_income_distribution):
|
|
166 |
}
|
167 |
|
168 |
# Calculate the weighted sum of the income distribution
|
169 |
-
weighted_sum = sum(
|
170 |
|
171 |
# Normalize the score by the total population
|
172 |
-
total_population = sum(
|
173 |
if total_population == 0:
|
174 |
return 0
|
175 |
|
|
|
1 |
import csv
|
2 |
import os
|
3 |
+
from shapely.validation import make_valid
|
4 |
|
5 |
def score_current(station_coord, df_features, cov_smsv, w_density, w_income, w_age) -> float:
|
6 |
"""
|
|
|
36 |
|
37 |
# TODO: take into account fjoldi starfandi, if there are more people who work than live => many people need to get there, also works the other way around.
|
38 |
total_score = 0
|
39 |
+
total_income_score = 0
|
40 |
+
total_density_score = 0
|
41 |
+
total_age_score = 0
|
42 |
aggregated_age_distribution = {}
|
43 |
aggregated_income_distribution = {}
|
44 |
+
small_area_contributions = {}
|
45 |
|
46 |
for smsv in cov_smsv:
|
47 |
smsv_info = df_features[df_features["smallAreaId"] == smsv["id"]]
|
48 |
|
49 |
+
# Get geometry of the small area
|
50 |
+
geometry = smsv_info["geometry"].iloc[0]
|
51 |
+
|
52 |
# Get age distribution for the year 2024
|
53 |
age_dist = smsv_info["age_distribution"].iloc[0].get(2024, {}) # only interested in 2024 for current score
|
54 |
|
|
|
71 |
else:
|
72 |
aggregated_income_distribution[income_group] = proportion
|
73 |
|
74 |
+
# Calculate density score
|
75 |
+
density_contribution = smsv_info["density"].iloc[0] * w_density * smsv["small_zone_percentage"] * 100
|
|
|
|
|
|
|
76 |
|
77 |
+
# Calculate age score
|
78 |
+
age_contribution = get_age_score(age_dist) * w_age * smsv["small_zone_percentage"]
|
|
|
79 |
|
80 |
+
# Calculate income score
|
81 |
+
income_contribution = get_income_score(income_dist) * w_income * smsv["small_zone_percentage"]
|
82 |
+
|
83 |
+
# Total contribution for this small area
|
84 |
+
area_score = density_contribution + age_contribution + income_contribution
|
85 |
+
total_score += area_score
|
86 |
+
|
87 |
+
# Total age score
|
88 |
+
total_age_score += age_contribution
|
89 |
+
# Total income score
|
90 |
+
total_income_score += income_contribution
|
91 |
+
# Total density score
|
92 |
+
total_density_score += density_contribution
|
93 |
+
|
94 |
+
# Store contribution data for this small area
|
95 |
+
small_area_contributions[smsv["id"]] = {
|
96 |
+
"density_score": density_contribution,
|
97 |
+
"age_score": age_contribution,
|
98 |
+
"income_score": income_contribution,
|
99 |
+
"total_score": area_score,
|
100 |
+
"geometry": geometry,
|
101 |
+
}
|
102 |
+
|
103 |
+
# # Calculate age score
|
104 |
+
# age_score = get_age_score(aggregated_age_distribution) * w_age
|
105 |
+
# total_score += age_score
|
106 |
+
|
107 |
+
# # Calculate income score
|
108 |
+
# income_score = get_income_score(aggregated_income_distribution) * w_income
|
109 |
+
# total_score += income_score
|
110 |
|
111 |
return {"total_score": total_score,
|
112 |
+
"income_score": total_income_score,
|
113 |
+
"age_score": total_age_score,
|
114 |
+
"density_score": total_density_score,
|
115 |
"age_data": aggregated_age_distribution,
|
116 |
+
"income_data": aggregated_income_distribution,
|
117 |
+
"small_area_contributions": small_area_contributions,
|
118 |
}
|
119 |
|
120 |
+
def get_age_score(age_distribution):
|
121 |
"""
|
122 |
Calculate a score based on age distribution.
|
123 |
|
|
|
155 |
}
|
156 |
|
157 |
# Calculate the weighted sum of the age distribution
|
158 |
+
weighted_sum = sum(age_distribution.get(age, 0) * weight for age, weight in age_weights.items())
|
159 |
|
160 |
# Normalize the score by the total population
|
161 |
+
total_population = sum(age_distribution.values())
|
162 |
if total_population == 0:
|
163 |
return 0
|
164 |
|
165 |
return weighted_sum / total_population
|
166 |
|
167 |
|
168 |
+
def get_income_score(income_distribution):
|
169 |
"""
|
170 |
Calculate a score based on income distribution.
|
171 |
|
|
|
194 |
}
|
195 |
|
196 |
# Calculate the weighted sum of the income distribution
|
197 |
+
weighted_sum = sum(income_distribution.get(income_class, 0) * weight for income_class, weight in income_weights.items())
|
198 |
|
199 |
# Normalize the score by the total population
|
200 |
+
total_population = sum(income_distribution.values())
|
201 |
if total_population == 0:
|
202 |
return 0
|
203 |
|