Spaces:
Running
Running
Alejandro Cremades
commited on
Commit
•
6ca52a1
1
Parent(s):
5fd242f
Black format notebook
Browse files- middleschool-cardlist.ipynb +83 -72
middleschool-cardlist.ipynb
CHANGED
@@ -112,31 +112,28 @@
|
|
112 |
" cards = json.loads(json_data.read())\n",
|
113 |
"\n",
|
114 |
"# Create a pandas DataFrame with all cards from all legal sets\n",
|
115 |
-
"column_names = [
|
116 |
"middleschool_df = pd.DataFrame(columns=column_names)\n",
|
117 |
"for card in cards:\n",
|
118 |
-
" oracle_id = card[
|
119 |
-
" name = card[
|
120 |
-
" lang_ja = [lang for lang in card[
|
121 |
-
" if lang['language'] == 'Japanese']\n",
|
122 |
" # Some cards do not have a Japanese name\n",
|
123 |
-
" if
|
124 |
-
" name_ja = lang_ja[0][
|
125 |
" else:\n",
|
126 |
" name_ja = None\n",
|
127 |
-
" temporary_df = pd.DataFrame(
|
128 |
-
"
|
129 |
-
"
|
130 |
-
" 'name_ja': [name_ja]\n",
|
131 |
-
" })\n",
|
132 |
" middleschool_df = pd.concat([middleschool_df, temporary_df])\n",
|
133 |
"\n",
|
134 |
"# For cards with multiple occurrences, put the rows that have the Japanese name on top\n",
|
135 |
-
"middleschool_df = middleschool_df.sort_values(by=[
|
136 |
"# For cards with multiple occurrences, delete all rows except for the top one\n",
|
137 |
-
"middleschool_df = middleschool_df.drop_duplicates(subset=[
|
138 |
-
"print(middleschool_df.shape[0],
|
139 |
-
"print(
|
140 |
"print(middleschool_df.head())\n",
|
141 |
"print(middleschool_df.tail())\n"
|
142 |
]
|
@@ -146,7 +143,7 @@
|
|
146 |
"cell_type": "markdown",
|
147 |
"metadata": {},
|
148 |
"source": [
|
149 |
-
"Remove Japanese card names that are wrong on MTGJSON"
|
150 |
]
|
151 |
},
|
152 |
{
|
@@ -186,13 +183,23 @@
|
|
186 |
}
|
187 |
],
|
188 |
"source": [
|
189 |
-
"wrongnames = [
|
190 |
-
"
|
191 |
-
"
|
192 |
-
"
|
193 |
-
"
|
194 |
-
"
|
195 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
]
|
197 |
},
|
198 |
{
|
@@ -200,7 +207,7 @@
|
|
200 |
"cell_type": "markdown",
|
201 |
"metadata": {},
|
202 |
"source": [
|
203 |
-
"Find Japanese names for cards that were not released in Japanese in Middle School legal sets"
|
204 |
]
|
205 |
},
|
206 |
{
|
@@ -225,14 +232,15 @@
|
|
225 |
"source": [
|
226 |
"import time\n",
|
227 |
"from requests_html import HTMLSession\n",
|
|
|
228 |
"session = HTMLSession()\n",
|
229 |
"\n",
|
230 |
"\n",
|
231 |
"def find_japanese_name(name):\n",
|
232 |
-
" url =
|
233 |
" r = session.get(url)\n",
|
234 |
" # Find the text on the <title> element in the HTML document\n",
|
235 |
-
" title = r.html.find(
|
236 |
" # Find the position of the English card name within the title\n",
|
237 |
" idx = title.find(name)\n",
|
238 |
" # The Japanese name should be before the English name,\n",
|
@@ -241,7 +249,7 @@
|
|
241 |
" return None\n",
|
242 |
" # If the exact English card name can't be found, we look for a '/'\n",
|
243 |
" if idx == -1:\n",
|
244 |
-
" idx = title.find(
|
245 |
" # No '/' means no Japanese name\n",
|
246 |
" if idx == -1:\n",
|
247 |
" return None\n",
|
@@ -249,17 +257,18 @@
|
|
249 |
" name_ja = title[0:idx]\n",
|
250 |
" else:\n",
|
251 |
" # Take only the Japanese name from the title\n",
|
252 |
-
" name_ja = title[0:idx - 1]\n",
|
253 |
" return name_ja\n",
|
254 |
"\n",
|
255 |
"\n",
|
256 |
-
"english_only_cards = middleschool_df[middleschool_df[
|
257 |
-
"name_list = english_only_cards[
|
258 |
"for idx, name in enumerate(name_list):\n",
|
259 |
-
" middleschool_df.loc[
|
260 |
-
"
|
|
|
261 |
" # print(middleschool_df.loc[middleschool_df['name'] == name])\n",
|
262 |
-
" print(
|
263 |
" if idx % 80 == 79:\n",
|
264 |
" print()\n",
|
265 |
" time.sleep(1)\n"
|
@@ -270,7 +279,7 @@
|
|
270 |
"cell_type": "markdown",
|
271 |
"metadata": {},
|
272 |
"source": [
|
273 |
-
"Exclude all cards banned in Middle School"
|
274 |
]
|
275 |
},
|
276 |
{
|
@@ -289,44 +298,46 @@
|
|
289 |
}
|
290 |
],
|
291 |
"source": [
|
292 |
-
"banlist = [\
|
293 |
-
"
|
294 |
-
"
|
295 |
-
"
|
296 |
-
"
|
297 |
-
"
|
298 |
-
"
|
299 |
-
"
|
300 |
-
"
|
301 |
-
"
|
302 |
-
"
|
303 |
-
"
|
304 |
-
"
|
305 |
-
"
|
306 |
-
"
|
307 |
-
"
|
308 |
-
"
|
309 |
-
"
|
310 |
-
"
|
311 |
-
"
|
312 |
-
"
|
313 |
-
"
|
314 |
-
"
|
315 |
-
"
|
316 |
-
"
|
317 |
-
"
|
|
|
|
|
318 |
"# Find the rows with the banned cards\n",
|
319 |
-
"banned_df = middleschool_df[
|
320 |
-
" middleschool_df.name.tolist()).isin(banlist).any(axis=1).values
|
321 |
-
"
|
|
|
322 |
"# Append the banned cards to the main Middle School DataFrame,\n",
|
323 |
"# then remove any rows that appear twice,\n",
|
324 |
"# effectively leaving only the legal cards\n",
|
325 |
-
"middleschool_df = pd.concat(\n",
|
326 |
-
"
|
327 |
-
"print('Cards legal by set and not banned:', middleschool_df.shape[0])\n",
|
328 |
"middleschool_df = middleschool_df.reset_index(drop=True)\n",
|
329 |
-
"middleschool_df = middleschool_df[[
|
330 |
]
|
331 |
},
|
332 |
{
|
@@ -343,8 +354,8 @@
|
|
343 |
"metadata": {},
|
344 |
"outputs": [],
|
345 |
"source": [
|
346 |
-
"middleschool_df.to_csv(
|
347 |
-
"middleschool_df.to_json(
|
348 |
]
|
349 |
},
|
350 |
{
|
@@ -352,7 +363,7 @@
|
|
352 |
"cell_type": "markdown",
|
353 |
"metadata": {},
|
354 |
"source": [
|
355 |
-
"Feel free to delete everything in the `data` directory after you are done"
|
356 |
]
|
357 |
}
|
358 |
],
|
@@ -372,7 +383,7 @@
|
|
372 |
"name": "python",
|
373 |
"nbconvert_exporter": "python",
|
374 |
"pygments_lexer": "ipython3",
|
375 |
-
"version": "3.11.
|
376 |
},
|
377 |
"orig_nbformat": 4,
|
378 |
"vscode": {
|
|
|
112 |
" cards = json.loads(json_data.read())\n",
|
113 |
"\n",
|
114 |
"# Create a pandas DataFrame with all cards from all legal sets\n",
|
115 |
+
"column_names = [\"oracle_id\", \"name\", \"name_ja\"]\n",
|
116 |
"middleschool_df = pd.DataFrame(columns=column_names)\n",
|
117 |
"for card in cards:\n",
|
118 |
+
" oracle_id = card[\"identifiers\"][\"scryfallOracleId\"]\n",
|
119 |
+
" name = card[\"name\"]\n",
|
120 |
+
" lang_ja = [lang for lang in card[\"foreignData\"] if lang[\"language\"] == \"Japanese\"]\n",
|
|
|
121 |
" # Some cards do not have a Japanese name\n",
|
122 |
+
" if len(lang_ja) > 0:\n",
|
123 |
+
" name_ja = lang_ja[0][\"name\"]\n",
|
124 |
" else:\n",
|
125 |
" name_ja = None\n",
|
126 |
+
" temporary_df = pd.DataFrame(\n",
|
127 |
+
" {\"oracle_id\": [oracle_id], \"name\": [name], \"name_ja\": [name_ja]}\n",
|
128 |
+
" )\n",
|
|
|
|
|
129 |
" middleschool_df = pd.concat([middleschool_df, temporary_df])\n",
|
130 |
"\n",
|
131 |
"# For cards with multiple occurrences, put the rows that have the Japanese name on top\n",
|
132 |
+
"middleschool_df = middleschool_df.sort_values(by=[\"name\", \"name_ja\"])\n",
|
133 |
"# For cards with multiple occurrences, delete all rows except for the top one\n",
|
134 |
+
"middleschool_df = middleschool_df.drop_duplicates(subset=[\"oracle_id\"])\n",
|
135 |
+
"print(middleschool_df.shape[0], \"cards found\")\n",
|
136 |
+
"print(\"These are the first and last 5 cards\")\n",
|
137 |
"print(middleschool_df.head())\n",
|
138 |
"print(middleschool_df.tail())\n"
|
139 |
]
|
|
|
143 |
"cell_type": "markdown",
|
144 |
"metadata": {},
|
145 |
"source": [
|
146 |
+
"Remove Japanese card names that are wrong on MTGJSON\n"
|
147 |
]
|
148 |
},
|
149 |
{
|
|
|
183 |
}
|
184 |
],
|
185 |
"source": [
|
186 |
+
"wrongnames = [\n",
|
187 |
+
" \"Aether Barrier\",\n",
|
188 |
+
" \"Aether Burst\",\n",
|
189 |
+
" \"Aether Charge\",\n",
|
190 |
+
" \"Aether Flash\",\n",
|
191 |
+
" \"Aether Mutation\",\n",
|
192 |
+
" \"Aether Sting\",\n",
|
193 |
+
" \"Aether Storm\",\n",
|
194 |
+
" \"Aether Tide\",\n",
|
195 |
+
" \"Tainted Aether\",\n",
|
196 |
+
" \"Tar Pit Warrior\",\n",
|
197 |
+
"]\n",
|
198 |
+
"print(\"Before:\")\n",
|
199 |
+
"print(middleschool_df.loc[middleschool_df[\"name\"].isin(wrongnames)])\n",
|
200 |
+
"middleschool_df.loc[middleschool_df[\"name\"].isin(wrongnames), \"name_ja\"] = None\n",
|
201 |
+
"print(\"After:\")\n",
|
202 |
+
"print(middleschool_df.loc[middleschool_df[\"name\"].isin(wrongnames)])\n"
|
203 |
]
|
204 |
},
|
205 |
{
|
|
|
207 |
"cell_type": "markdown",
|
208 |
"metadata": {},
|
209 |
"source": [
|
210 |
+
"Find Japanese names for cards that were not released in Japanese in Middle School legal sets\n"
|
211 |
]
|
212 |
},
|
213 |
{
|
|
|
232 |
"source": [
|
233 |
"import time\n",
|
234 |
"from requests_html import HTMLSession\n",
|
235 |
+
"\n",
|
236 |
"session = HTMLSession()\n",
|
237 |
"\n",
|
238 |
"\n",
|
239 |
"def find_japanese_name(name):\n",
|
240 |
+
" url = \"http://whisper.wisdom-guild.net/card/\" + name + \"/\"\n",
|
241 |
" r = session.get(url)\n",
|
242 |
" # Find the text on the <title> element in the HTML document\n",
|
243 |
+
" title = r.html.find(\"title\")[0].text\n",
|
244 |
" # Find the position of the English card name within the title\n",
|
245 |
" idx = title.find(name)\n",
|
246 |
" # The Japanese name should be before the English name,\n",
|
|
|
249 |
" return None\n",
|
250 |
" # If the exact English card name can't be found, we look for a '/'\n",
|
251 |
" if idx == -1:\n",
|
252 |
+
" idx = title.find(\"/\")\n",
|
253 |
" # No '/' means no Japanese name\n",
|
254 |
" if idx == -1:\n",
|
255 |
" return None\n",
|
|
|
257 |
" name_ja = title[0:idx]\n",
|
258 |
" else:\n",
|
259 |
" # Take only the Japanese name from the title\n",
|
260 |
+
" name_ja = title[0 : idx - 1]\n",
|
261 |
" return name_ja\n",
|
262 |
"\n",
|
263 |
"\n",
|
264 |
+
"english_only_cards = middleschool_df[middleschool_df[\"name_ja\"].isnull()]\n",
|
265 |
+
"name_list = english_only_cards[\"name\"].to_list()\n",
|
266 |
"for idx, name in enumerate(name_list):\n",
|
267 |
+
" middleschool_df.loc[\n",
|
268 |
+
" middleschool_df[\"name\"] == name, \"name_ja\"\n",
|
269 |
+
" ] = find_japanese_name(name)\n",
|
270 |
" # print(middleschool_df.loc[middleschool_df['name'] == name])\n",
|
271 |
+
" print(\".\", end=\"\")\n",
|
272 |
" if idx % 80 == 79:\n",
|
273 |
" print()\n",
|
274 |
" time.sleep(1)\n"
|
|
|
279 |
"cell_type": "markdown",
|
280 |
"metadata": {},
|
281 |
"source": [
|
282 |
+
"Exclude all cards banned in Middle School\n"
|
283 |
]
|
284 |
},
|
285 |
{
|
|
|
298 |
}
|
299 |
],
|
300 |
"source": [
|
301 |
+
"banlist = [\n",
|
302 |
+
" \"Amulet of Quoz\",\n",
|
303 |
+
" \"Balance\",\n",
|
304 |
+
" \"Brainstorm\",\n",
|
305 |
+
" \"Bronze Tablet\",\n",
|
306 |
+
" \"Channel\",\n",
|
307 |
+
" \"Dark Ritual\",\n",
|
308 |
+
" \"Demonic Consultation\",\n",
|
309 |
+
" \"Flash\",\n",
|
310 |
+
" \"Goblin Recruiter\",\n",
|
311 |
+
" \"Imperial Seal\",\n",
|
312 |
+
" \"Jeweled Bird\",\n",
|
313 |
+
" \"Mana Crypt\",\n",
|
314 |
+
" \"Mana Vault\",\n",
|
315 |
+
" \"Memory Jar\",\n",
|
316 |
+
" \"Mind's Desire\",\n",
|
317 |
+
" \"Mind Twist\",\n",
|
318 |
+
" \"Rebirth\",\n",
|
319 |
+
" \"Strip Mine\",\n",
|
320 |
+
" \"Tempest Efreet\",\n",
|
321 |
+
" \"Timmerian Fiends\",\n",
|
322 |
+
" \"Tolarian Academy\",\n",
|
323 |
+
" \"Vampiric Tutor\",\n",
|
324 |
+
" \"Windfall\",\n",
|
325 |
+
" \"Yawgmoth's Bargain\",\n",
|
326 |
+
" \"Yawgmoth's Will\",\n",
|
327 |
+
"]\n",
|
328 |
+
"print(\"Cards legal by set:\", middleschool_df.shape[0])\n",
|
329 |
"# Find the rows with the banned cards\n",
|
330 |
+
"banned_df = middleschool_df[\n",
|
331 |
+
" pd.DataFrame(middleschool_df.name.tolist()).isin(banlist).any(axis=1).values\n",
|
332 |
+
"]\n",
|
333 |
+
"print(\"Banned cards:\", banned_df.shape[0])\n",
|
334 |
"# Append the banned cards to the main Middle School DataFrame,\n",
|
335 |
"# then remove any rows that appear twice,\n",
|
336 |
"# effectively leaving only the legal cards\n",
|
337 |
+
"middleschool_df = pd.concat([middleschool_df, banned_df]).drop_duplicates(keep=False)\n",
|
338 |
+
"print(\"Cards legal by set and not banned:\", middleschool_df.shape[0])\n",
|
|
|
339 |
"middleschool_df = middleschool_df.reset_index(drop=True)\n",
|
340 |
+
"middleschool_df = middleschool_df[[\"oracle_id\", \"name\", \"name_ja\"]]\n"
|
341 |
]
|
342 |
},
|
343 |
{
|
|
|
354 |
"metadata": {},
|
355 |
"outputs": [],
|
356 |
"source": [
|
357 |
+
"middleschool_df.to_csv(\"output/middleschool.csv\")\n",
|
358 |
+
"middleschool_df.to_json(\"output/middleschool.json\")\n"
|
359 |
]
|
360 |
},
|
361 |
{
|
|
|
363 |
"cell_type": "markdown",
|
364 |
"metadata": {},
|
365 |
"source": [
|
366 |
+
"Feel free to delete everything in the `data` directory after you are done\n"
|
367 |
]
|
368 |
}
|
369 |
],
|
|
|
383 |
"name": "python",
|
384 |
"nbconvert_exporter": "python",
|
385 |
"pygments_lexer": "ipython3",
|
386 |
+
"version": "3.11.4"
|
387 |
},
|
388 |
"orig_nbformat": 4,
|
389 |
"vscode": {
|