Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,6 +6,7 @@ import io
|
|
| 6 |
import contextlib
|
| 7 |
import requests
|
| 8 |
import random
|
|
|
|
| 9 |
|
| 10 |
FORBIDDEN_NAMES ={"Judas",
|
| 11 |
"Judas Iscariot"
|
|
@@ -87,25 +88,34 @@ download_file(
|
|
| 87 |
# --- Load datasets ---
|
| 88 |
ssa_name_txt_files = sorted(Path(".").glob("yob*.txt"))
|
| 89 |
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
| 91 |
dfs = []
|
| 92 |
for f in ssa_name_txt_files:
|
| 93 |
year = int(f.stem.replace("yob", ""))
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
dfs.append(df)
|
| 98 |
-
if not dfs:
|
| 99 |
-
return pd.DataFrame(), pd.DataFrame()
|
| 100 |
-
|
| 101 |
full_df = pd.concat(dfs, ignore_index=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
agg_df = (
|
| 103 |
-
|
| 104 |
.groupby(["name", "sex"], as_index=False)["count"]
|
| 105 |
.sum()
|
| 106 |
.sort_values("count", ascending=False)
|
| 107 |
)
|
| 108 |
-
return
|
|
|
|
| 109 |
|
| 110 |
|
| 111 |
|
|
|
|
| 6 |
import contextlib
|
| 7 |
import requests
|
| 8 |
import random
|
| 9 |
+
from functools import lru_cache
|
| 10 |
|
| 11 |
FORBIDDEN_NAMES ={"Judas",
|
| 12 |
"Judas Iscariot"
|
|
|
|
| 88 |
# --- Load datasets ---
|
| 89 |
ssa_name_txt_files = sorted(Path(".").glob("yob*.txt"))
|
| 90 |
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
@lru_cache(maxsize=1)
|
| 94 |
+
def load_all_ssa_names():
|
| 95 |
dfs = []
|
| 96 |
for f in ssa_name_txt_files:
|
| 97 |
year = int(f.stem.replace("yob", ""))
|
| 98 |
+
df = pd.read_csv(f, names=["name", "sex", "count"])
|
| 99 |
+
df["year"] = year
|
| 100 |
+
dfs.append(df)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
full_df = pd.concat(dfs, ignore_index=True)
|
| 102 |
+
return full_df
|
| 103 |
+
|
| 104 |
+
def load_ssa_names(min_year=0, max_year=9999):
|
| 105 |
+
full_df = load_all_ssa_names()
|
| 106 |
+
filtered_df = full_df[(full_df["year"] >= min_year) & (full_df["year"] <= max_year)]
|
| 107 |
+
|
| 108 |
+
if filtered_df.empty:
|
| 109 |
+
return pd.DataFrame(), pd.DataFrame()
|
| 110 |
+
|
| 111 |
agg_df = (
|
| 112 |
+
filtered_df
|
| 113 |
.groupby(["name", "sex"], as_index=False)["count"]
|
| 114 |
.sum()
|
| 115 |
.sort_values("count", ascending=False)
|
| 116 |
)
|
| 117 |
+
return filtered_df, agg_df
|
| 118 |
+
|
| 119 |
|
| 120 |
|
| 121 |
|