ElvarThorS commited on
Commit
46d9bb2
·
1 Parent(s): 42ecb2b

Fixed bugs, and refactored scoring

Browse files
app/data_processing/get_station_coverage.py CHANGED
@@ -1,5 +1,6 @@
1
  from typing import List, Dict, Tuple
2
  from shapely.geometry import Point, shape
 
3
  from shapely import Polygon
4
  from data_processing.get_smallAreaInfo import get_smallAreas
5
 
@@ -32,6 +33,14 @@ def get_station_coverage(
32
  coordinates = area["geometry"]
33
  geometry = Polygon(coordinates)
34
 
 
 
 
 
 
 
 
 
35
  # Check intersection with the station buffer
36
  if geometry.intersects(station_buffer):
37
  # Calculate intersection area
 
1
  from typing import List, Dict, Tuple
2
  from shapely.geometry import Point, shape
3
+ from shapely.validation import make_valid
4
  from shapely import Polygon
5
  from data_processing.get_smallAreaInfo import get_smallAreas
6
 
 
33
  coordinates = area["geometry"]
34
  geometry = Polygon(coordinates)
35
 
36
+ # Validate and fix invalid geometries
37
+ if not geometry.is_valid:
38
+ print(f"Invalid geometry detected for Area ID: {area['id']}. Attempting to fix.")
39
+ geometry = make_valid(geometry)
40
+
41
+ # Simplify geometry to avoid potential issues with highly complex polygons
42
+ geometry = geometry.simplify(tolerance=0.01, preserve_topology=True)
43
+
44
  # Check intersection with the station buffer
45
  if geometry.intersects(station_buffer):
46
  # Calculate intersection area
app/data_processing/point_scoring.py CHANGED
@@ -1,6 +1,6 @@
1
  import csv
2
  import os
3
-
4
 
5
  def score_current(station_coord, df_features, cov_smsv, w_density, w_income, w_age) -> float:
6
  """
@@ -36,15 +36,19 @@ def score_current(station_coord, df_features, cov_smsv, w_density, w_income, w_a
36
 
37
  # TODO: take into account fjoldi starfandi, if there are more people who work than live => many people need to get there, also works the other way around.
38
  total_score = 0
39
- income_score = 0
40
- density_score = 0
41
- age_score = 0
42
  aggregated_age_distribution = {}
43
  aggregated_income_distribution = {}
 
44
 
45
  for smsv in cov_smsv:
46
  smsv_info = df_features[df_features["smallAreaId"] == smsv["id"]]
47
 
 
 
 
48
  # Get age distribution for the year 2024
49
  age_dist = smsv_info["age_distribution"].iloc[0].get(2024, {}) # only interested in 2024 for current score
50
 
@@ -67,29 +71,53 @@ def score_current(station_coord, df_features, cov_smsv, w_density, w_income, w_a
67
  else:
68
  aggregated_income_distribution[income_group] = proportion
69
 
70
- # Calculate density scores
71
- density_score = smsv_info["density"].iloc[0] * w_density
72
-
73
- # Add to total score
74
- total_score += (density_score) * smsv["coverage_percentage"] # TODO: Area of the cricle * percent covered / total area of the small area
75
 
76
- # Calculate age score
77
- age_score = get_age_score(aggregated_age_distribution) * w_age
78
- total_score += age_score
79
 
80
- # Calculate income score
81
- income_score = get_income_score(aggregated_income_distribution) * w_income
82
- total_score += income_score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  return {"total_score": total_score,
85
- "income_score": income_score,
86
- "age_score": age_score,
87
- "density_score": density_score,
88
  "age_data": aggregated_age_distribution,
89
- "income_data": aggregated_income_distribution
 
90
  }
91
 
92
- def get_age_score(proportional_age_distribution):
93
  """
94
  Calculate a score based on age distribution.
95
 
@@ -127,17 +155,17 @@ def get_age_score(proportional_age_distribution):
127
  }
128
 
129
  # Calculate the weighted sum of the age distribution
130
- weighted_sum = sum(proportional_age_distribution.get(age, 0) * weight for age, weight in age_weights.items())
131
 
132
  # Normalize the score by the total population
133
- total_population = sum(proportional_age_distribution.values())
134
  if total_population == 0:
135
  return 0
136
 
137
  return weighted_sum / total_population
138
 
139
 
140
- def get_income_score(proportional_income_distribution):
141
  """
142
  Calculate a score based on income distribution.
143
 
@@ -166,10 +194,10 @@ def get_income_score(proportional_income_distribution):
166
  }
167
 
168
  # Calculate the weighted sum of the income distribution
169
- weighted_sum = sum(proportional_income_distribution.get(income_class, 0) * weight for income_class, weight in income_weights.items())
170
 
171
  # Normalize the score by the total population
172
- total_population = sum(proportional_income_distribution.values())
173
  if total_population == 0:
174
  return 0
175
 
 
1
  import csv
2
  import os
3
+ from shapely.validation import make_valid
4
 
5
  def score_current(station_coord, df_features, cov_smsv, w_density, w_income, w_age) -> float:
6
  """
 
36
 
37
  # TODO: take into account fjoldi starfandi, if there are more people who work than live => many people need to get there, also works the other way around.
38
  total_score = 0
39
+ total_income_score = 0
40
+ total_density_score = 0
41
+ total_age_score = 0
42
  aggregated_age_distribution = {}
43
  aggregated_income_distribution = {}
44
+ small_area_contributions = {}
45
 
46
  for smsv in cov_smsv:
47
  smsv_info = df_features[df_features["smallAreaId"] == smsv["id"]]
48
 
49
+ # Get geometry of the small area
50
+ geometry = smsv_info["geometry"].iloc[0]
51
+
52
  # Get age distribution for the year 2024
53
  age_dist = smsv_info["age_distribution"].iloc[0].get(2024, {}) # only interested in 2024 for current score
54
 
 
71
  else:
72
  aggregated_income_distribution[income_group] = proportion
73
 
74
+ # Calculate density score
75
+ density_contribution = smsv_info["density"].iloc[0] * w_density * smsv["small_zone_percentage"] * 100
 
 
 
76
 
77
+ # Calculate age score
78
+ age_contribution = get_age_score(age_dist) * w_age * smsv["small_zone_percentage"]
 
79
 
80
+ # Calculate income score
81
+ income_contribution = get_income_score(income_dist) * w_income * smsv["small_zone_percentage"]
82
+
83
+ # Total contribution for this small area
84
+ area_score = density_contribution + age_contribution + income_contribution
85
+ total_score += area_score
86
+
87
+ # Total age score
88
+ total_age_score += age_contribution
89
+ # Total income score
90
+ total_income_score += income_contribution
91
+ # Total density score
92
+ total_density_score += density_contribution
93
+
94
+ # Store contribution data for this small area
95
+ small_area_contributions[smsv["id"]] = {
96
+ "density_score": density_contribution,
97
+ "age_score": age_contribution,
98
+ "income_score": income_contribution,
99
+ "total_score": area_score,
100
+ "geometry": geometry,
101
+ }
102
+
103
+ # # Calculate age score
104
+ # age_score = get_age_score(aggregated_age_distribution) * w_age
105
+ # total_score += age_score
106
+
107
+ # # Calculate income score
108
+ # income_score = get_income_score(aggregated_income_distribution) * w_income
109
+ # total_score += income_score
110
 
111
  return {"total_score": total_score,
112
+ "income_score": total_income_score,
113
+ "age_score": total_age_score,
114
+ "density_score": total_density_score,
115
  "age_data": aggregated_age_distribution,
116
+ "income_data": aggregated_income_distribution,
117
+ "small_area_contributions": small_area_contributions,
118
  }
119
 
120
+ def get_age_score(age_distribution):
121
  """
122
  Calculate a score based on age distribution.
123
 
 
155
  }
156
 
157
  # Calculate the weighted sum of the age distribution
158
+ weighted_sum = sum(age_distribution.get(age, 0) * weight for age, weight in age_weights.items())
159
 
160
  # Normalize the score by the total population
161
+ total_population = sum(age_distribution.values())
162
  if total_population == 0:
163
  return 0
164
 
165
  return weighted_sum / total_population
166
 
167
 
168
+ def get_income_score(income_distribution):
169
  """
170
  Calculate a score based on income distribution.
171
 
 
194
  }
195
 
196
  # Calculate the weighted sum of the income distribution
197
+ weighted_sum = sum(income_distribution.get(income_class, 0) * weight for income_class, weight in income_weights.items())
198
 
199
  # Normalize the score by the total population
200
+ total_population = sum(income_distribution.values())
201
  if total_population == 0:
202
  return 0
203