cdleong commited on
Commit
2280005
·
verified ·
1 Parent(s): 52494fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -10
app.py CHANGED
@@ -6,6 +6,7 @@ import io
6
  import contextlib
7
  import requests
8
  import random
 
9
 
10
  FORBIDDEN_NAMES ={"Judas",
11
  "Judas Iscariot"
@@ -87,25 +88,34 @@ download_file(
87
  # --- Load datasets ---
88
  ssa_name_txt_files = sorted(Path(".").glob("yob*.txt"))
89
 
90
- def load_ssa_names(min_year=0, max_year=9999):
 
 
 
91
  dfs = []
92
  for f in ssa_name_txt_files:
93
  year = int(f.stem.replace("yob", ""))
94
- if min_year <= year <= max_year:
95
- df = pd.read_csv(f, names=["name", "sex", "count"])
96
- df["year"] = year
97
- dfs.append(df)
98
- if not dfs:
99
- return pd.DataFrame(), pd.DataFrame()
100
-
101
  full_df = pd.concat(dfs, ignore_index=True)
 
 
 
 
 
 
 
 
 
102
  agg_df = (
103
- full_df
104
  .groupby(["name", "sex"], as_index=False)["count"]
105
  .sum()
106
  .sort_values("count", ascending=False)
107
  )
108
- return full_df, agg_df
 
109
 
110
 
111
 
 
6
  import contextlib
7
  import requests
8
  import random
9
+ from functools import lru_cache
10
 
11
  FORBIDDEN_NAMES ={"Judas",
12
  "Judas Iscariot"
 
88
  # --- Load datasets ---
89
  ssa_name_txt_files = sorted(Path(".").glob("yob*.txt"))
90
 
91
+
92
+
93
+ @lru_cache(maxsize=1)
94
+ def load_all_ssa_names():
95
  dfs = []
96
  for f in ssa_name_txt_files:
97
  year = int(f.stem.replace("yob", ""))
98
+ df = pd.read_csv(f, names=["name", "sex", "count"])
99
+ df["year"] = year
100
+ dfs.append(df)
 
 
 
 
101
  full_df = pd.concat(dfs, ignore_index=True)
102
+ return full_df
103
+
104
+ def load_ssa_names(min_year=0, max_year=9999):
105
+ full_df = load_all_ssa_names()
106
+ filtered_df = full_df[(full_df["year"] >= min_year) & (full_df["year"] <= max_year)]
107
+
108
+ if filtered_df.empty:
109
+ return pd.DataFrame(), pd.DataFrame()
110
+
111
  agg_df = (
112
+ filtered_df
113
  .groupby(["name", "sex"], as_index=False)["count"]
114
  .sum()
115
  .sort_values("count", ascending=False)
116
  )
117
+ return filtered_df, agg_df
118
+
119
 
120
 
121