Alejandro Cremades commited on
Commit
a006abf
·
unverified ·
2 Parent(s): 5fd242f 6ca52a1

Merge pull request #22 from alecrem/chore/no-ref/black-format-notebook

Browse files
Files changed (1) hide show
  1. middleschool-cardlist.ipynb +83 -72
middleschool-cardlist.ipynb CHANGED
@@ -112,31 +112,28 @@
112
  " cards = json.loads(json_data.read())\n",
113
  "\n",
114
  "# Create a pandas DataFrame with all cards from all legal sets\n",
115
- "column_names = ['oracle_id', 'name', 'name_ja']\n",
116
  "middleschool_df = pd.DataFrame(columns=column_names)\n",
117
  "for card in cards:\n",
118
- " oracle_id = card['identifiers']['scryfallOracleId']\n",
119
- " name = card['name']\n",
120
- " lang_ja = [lang for lang in card['foreignData']\n",
121
- " if lang['language'] == 'Japanese']\n",
122
  " # Some cards do not have a Japanese name\n",
123
- " if (len(lang_ja) > 0):\n",
124
- " name_ja = lang_ja[0]['name']\n",
125
  " else:\n",
126
  " name_ja = None\n",
127
- " temporary_df = pd.DataFrame({\n",
128
- " 'oracle_id': [oracle_id],\n",
129
- " 'name': [name],\n",
130
- " 'name_ja': [name_ja]\n",
131
- " })\n",
132
  " middleschool_df = pd.concat([middleschool_df, temporary_df])\n",
133
  "\n",
134
  "# For cards with multiple occurrences, put the rows that have the Japanese name on top\n",
135
- "middleschool_df = middleschool_df.sort_values(by=['name', 'name_ja'])\n",
136
  "# For cards with multiple occurrences, delete all rows except for the top one\n",
137
- "middleschool_df = middleschool_df.drop_duplicates(subset=['oracle_id'])\n",
138
- "print(middleschool_df.shape[0], 'cards found')\n",
139
- "print('These are the first and last 5 cards')\n",
140
  "print(middleschool_df.head())\n",
141
  "print(middleschool_df.tail())\n"
142
  ]
@@ -146,7 +143,7 @@
146
  "cell_type": "markdown",
147
  "metadata": {},
148
  "source": [
149
- "Remove Japanese card names that are wrong on MTGJSON"
150
  ]
151
  },
152
  {
@@ -186,13 +183,23 @@
186
  }
187
  ],
188
  "source": [
189
- "wrongnames = ['Aether Barrier', 'Aether Burst', 'Aether Charge', 'Aether Flash', 'Aether Mutation',\n",
190
- " 'Aether Sting', 'Aether Storm', 'Aether Tide', 'Tainted Aether', 'Tar Pit Warrior']\n",
191
- "print('Before:')\n",
192
- "print(middleschool_df.loc[middleschool_df['name'].isin(wrongnames)])\n",
193
- "middleschool_df.loc[middleschool_df['name'].isin(wrongnames), 'name_ja'] = None\n",
194
- "print('After:')\n",
195
- "print(middleschool_df.loc[middleschool_df['name'].isin(wrongnames)])\n"
 
 
 
 
 
 
 
 
 
 
196
  ]
197
  },
198
  {
@@ -200,7 +207,7 @@
200
  "cell_type": "markdown",
201
  "metadata": {},
202
  "source": [
203
- "Find Japanese names for cards that were not released in Japanese in Middle School legal sets"
204
  ]
205
  },
206
  {
@@ -225,14 +232,15 @@
225
  "source": [
226
  "import time\n",
227
  "from requests_html import HTMLSession\n",
 
228
  "session = HTMLSession()\n",
229
  "\n",
230
  "\n",
231
  "def find_japanese_name(name):\n",
232
- " url = 'http://whisper.wisdom-guild.net/card/' + name + '/'\n",
233
  " r = session.get(url)\n",
234
  " # Find the text on the <title> element in the HTML document\n",
235
- " title = r.html.find('title')[0].text\n",
236
  " # Find the position of the English card name within the title\n",
237
  " idx = title.find(name)\n",
238
  " # The Japanese name should be before the English name,\n",
@@ -241,7 +249,7 @@
241
  " return None\n",
242
  " # If the exact English card name can't be found, we look for a '/'\n",
243
  " if idx == -1:\n",
244
- " idx = title.find('/')\n",
245
  " # No '/' means no Japanese name\n",
246
  " if idx == -1:\n",
247
  " return None\n",
@@ -249,17 +257,18 @@
249
  " name_ja = title[0:idx]\n",
250
  " else:\n",
251
  " # Take only the Japanese name from the title\n",
252
- " name_ja = title[0:idx - 1]\n",
253
  " return name_ja\n",
254
  "\n",
255
  "\n",
256
- "english_only_cards = middleschool_df[middleschool_df['name_ja'].isnull()]\n",
257
- "name_list = english_only_cards['name'].to_list()\n",
258
  "for idx, name in enumerate(name_list):\n",
259
- " middleschool_df.loc[middleschool_df['name'] ==\n",
260
- " name, 'name_ja'] = find_japanese_name(name)\n",
 
261
  " # print(middleschool_df.loc[middleschool_df['name'] == name])\n",
262
- " print('.', end='')\n",
263
  " if idx % 80 == 79:\n",
264
  " print()\n",
265
  " time.sleep(1)\n"
@@ -270,7 +279,7 @@
270
  "cell_type": "markdown",
271
  "metadata": {},
272
  "source": [
273
- "Exclude all cards banned in Middle School"
274
  ]
275
  },
276
  {
@@ -289,44 +298,46 @@
289
  }
290
  ],
291
  "source": [
292
- "banlist = [\"Amulet of Quoz\",\n",
293
- " \"Balance\",\n",
294
- " \"Brainstorm\",\n",
295
- " \"Bronze Tablet\",\n",
296
- " \"Channel\",\n",
297
- " \"Dark Ritual\",\n",
298
- " \"Demonic Consultation\",\n",
299
- " \"Flash\",\n",
300
- " \"Goblin Recruiter\",\n",
301
- " \"Imperial Seal\",\n",
302
- " \"Jeweled Bird\",\n",
303
- " \"Mana Crypt\",\n",
304
- " \"Mana Vault\",\n",
305
- " \"Memory Jar\",\n",
306
- " \"Mind's Desire\",\n",
307
- " \"Mind Twist\",\n",
308
- " \"Rebirth\",\n",
309
- " \"Strip Mine\",\n",
310
- " \"Tempest Efreet\",\n",
311
- " \"Timmerian Fiends\",\n",
312
- " \"Tolarian Academy\",\n",
313
- " \"Vampiric Tutor\",\n",
314
- " \"Windfall\",\n",
315
- " \"Yawgmoth's Bargain\",\n",
316
- " \"Yawgmoth's Will\"]\n",
317
- "print('Cards legal by set:', middleschool_df.shape[0])\n",
 
 
318
  "# Find the rows with the banned cards\n",
319
- "banned_df = middleschool_df[pd.DataFrame(\n",
320
- " middleschool_df.name.tolist()).isin(banlist).any(axis=1).values]\n",
321
- "print('Banned cards:', banned_df.shape[0])\n",
 
322
  "# Append the banned cards to the main Middle School DataFrame,\n",
323
  "# then remove any rows that appear twice,\n",
324
  "# effectively leaving only the legal cards\n",
325
- "middleschool_df = pd.concat(\n",
326
- " [middleschool_df, banned_df]).drop_duplicates(keep=False)\n",
327
- "print('Cards legal by set and not banned:', middleschool_df.shape[0])\n",
328
  "middleschool_df = middleschool_df.reset_index(drop=True)\n",
329
- "middleschool_df = middleschool_df[['oracle_id', 'name', 'name_ja']]\n"
330
  ]
331
  },
332
  {
@@ -343,8 +354,8 @@
343
  "metadata": {},
344
  "outputs": [],
345
  "source": [
346
- "middleschool_df.to_csv('output/middleschool.csv')\n",
347
- "middleschool_df.to_json('output/middleschool.json')\n"
348
  ]
349
  },
350
  {
@@ -352,7 +363,7 @@
352
  "cell_type": "markdown",
353
  "metadata": {},
354
  "source": [
355
- "Feel free to delete everything in the `data` directory after you are done"
356
  ]
357
  }
358
  ],
@@ -372,7 +383,7 @@
372
  "name": "python",
373
  "nbconvert_exporter": "python",
374
  "pygments_lexer": "ipython3",
375
- "version": "3.11.3"
376
  },
377
  "orig_nbformat": 4,
378
  "vscode": {
 
112
  " cards = json.loads(json_data.read())\n",
113
  "\n",
114
  "# Create a pandas DataFrame with all cards from all legal sets\n",
115
+ "column_names = [\"oracle_id\", \"name\", \"name_ja\"]\n",
116
  "middleschool_df = pd.DataFrame(columns=column_names)\n",
117
  "for card in cards:\n",
118
+ " oracle_id = card[\"identifiers\"][\"scryfallOracleId\"]\n",
119
+ " name = card[\"name\"]\n",
120
+ " lang_ja = [lang for lang in card[\"foreignData\"] if lang[\"language\"] == \"Japanese\"]\n",
 
121
  " # Some cards do not have a Japanese name\n",
122
+ " if len(lang_ja) > 0:\n",
123
+ " name_ja = lang_ja[0][\"name\"]\n",
124
  " else:\n",
125
  " name_ja = None\n",
126
+ " temporary_df = pd.DataFrame(\n",
127
+ " {\"oracle_id\": [oracle_id], \"name\": [name], \"name_ja\": [name_ja]}\n",
128
+ " )\n",
 
 
129
  " middleschool_df = pd.concat([middleschool_df, temporary_df])\n",
130
  "\n",
131
  "# For cards with multiple occurrences, put the rows that have the Japanese name on top\n",
132
+ "middleschool_df = middleschool_df.sort_values(by=[\"name\", \"name_ja\"])\n",
133
  "# For cards with multiple occurrences, delete all rows except for the top one\n",
134
+ "middleschool_df = middleschool_df.drop_duplicates(subset=[\"oracle_id\"])\n",
135
+ "print(middleschool_df.shape[0], \"cards found\")\n",
136
+ "print(\"These are the first and last 5 cards\")\n",
137
  "print(middleschool_df.head())\n",
138
  "print(middleschool_df.tail())\n"
139
  ]
 
143
  "cell_type": "markdown",
144
  "metadata": {},
145
  "source": [
146
+ "Remove Japanese card names that are wrong on MTGJSON\n"
147
  ]
148
  },
149
  {
 
183
  }
184
  ],
185
  "source": [
186
+ "wrongnames = [\n",
187
+ " \"Aether Barrier\",\n",
188
+ " \"Aether Burst\",\n",
189
+ " \"Aether Charge\",\n",
190
+ " \"Aether Flash\",\n",
191
+ " \"Aether Mutation\",\n",
192
+ " \"Aether Sting\",\n",
193
+ " \"Aether Storm\",\n",
194
+ " \"Aether Tide\",\n",
195
+ " \"Tainted Aether\",\n",
196
+ " \"Tar Pit Warrior\",\n",
197
+ "]\n",
198
+ "print(\"Before:\")\n",
199
+ "print(middleschool_df.loc[middleschool_df[\"name\"].isin(wrongnames)])\n",
200
+ "middleschool_df.loc[middleschool_df[\"name\"].isin(wrongnames), \"name_ja\"] = None\n",
201
+ "print(\"After:\")\n",
202
+ "print(middleschool_df.loc[middleschool_df[\"name\"].isin(wrongnames)])\n"
203
  ]
204
  },
205
  {
 
207
  "cell_type": "markdown",
208
  "metadata": {},
209
  "source": [
210
+ "Find Japanese names for cards that were not released in Japanese in Middle School legal sets\n"
211
  ]
212
  },
213
  {
 
232
  "source": [
233
  "import time\n",
234
  "from requests_html import HTMLSession\n",
235
+ "\n",
236
  "session = HTMLSession()\n",
237
  "\n",
238
  "\n",
239
  "def find_japanese_name(name):\n",
240
+ " url = \"http://whisper.wisdom-guild.net/card/\" + name + \"/\"\n",
241
  " r = session.get(url)\n",
242
  " # Find the text on the <title> element in the HTML document\n",
243
+ " title = r.html.find(\"title\")[0].text\n",
244
  " # Find the position of the English card name within the title\n",
245
  " idx = title.find(name)\n",
246
  " # The Japanese name should be before the English name,\n",
 
249
  " return None\n",
250
  " # If the exact English card name can't be found, we look for a '/'\n",
251
  " if idx == -1:\n",
252
+ " idx = title.find(\"/\")\n",
253
  " # No '/' means no Japanese name\n",
254
  " if idx == -1:\n",
255
  " return None\n",
 
257
  " name_ja = title[0:idx]\n",
258
  " else:\n",
259
  " # Take only the Japanese name from the title\n",
260
+ " name_ja = title[0 : idx - 1]\n",
261
  " return name_ja\n",
262
  "\n",
263
  "\n",
264
+ "english_only_cards = middleschool_df[middleschool_df[\"name_ja\"].isnull()]\n",
265
+ "name_list = english_only_cards[\"name\"].to_list()\n",
266
  "for idx, name in enumerate(name_list):\n",
267
+ " middleschool_df.loc[\n",
268
+ " middleschool_df[\"name\"] == name, \"name_ja\"\n",
269
+ " ] = find_japanese_name(name)\n",
270
  " # print(middleschool_df.loc[middleschool_df['name'] == name])\n",
271
+ " print(\".\", end=\"\")\n",
272
  " if idx % 80 == 79:\n",
273
  " print()\n",
274
  " time.sleep(1)\n"
 
279
  "cell_type": "markdown",
280
  "metadata": {},
281
  "source": [
282
+ "Exclude all cards banned in Middle School\n"
283
  ]
284
  },
285
  {
 
298
  }
299
  ],
300
  "source": [
301
+ "banlist = [\n",
302
+ " \"Amulet of Quoz\",\n",
303
+ " \"Balance\",\n",
304
+ " \"Brainstorm\",\n",
305
+ " \"Bronze Tablet\",\n",
306
+ " \"Channel\",\n",
307
+ " \"Dark Ritual\",\n",
308
+ " \"Demonic Consultation\",\n",
309
+ " \"Flash\",\n",
310
+ " \"Goblin Recruiter\",\n",
311
+ " \"Imperial Seal\",\n",
312
+ " \"Jeweled Bird\",\n",
313
+ " \"Mana Crypt\",\n",
314
+ " \"Mana Vault\",\n",
315
+ " \"Memory Jar\",\n",
316
+ " \"Mind's Desire\",\n",
317
+ " \"Mind Twist\",\n",
318
+ " \"Rebirth\",\n",
319
+ " \"Strip Mine\",\n",
320
+ " \"Tempest Efreet\",\n",
321
+ " \"Timmerian Fiends\",\n",
322
+ " \"Tolarian Academy\",\n",
323
+ " \"Vampiric Tutor\",\n",
324
+ " \"Windfall\",\n",
325
+ " \"Yawgmoth's Bargain\",\n",
326
+ " \"Yawgmoth's Will\",\n",
327
+ "]\n",
328
+ "print(\"Cards legal by set:\", middleschool_df.shape[0])\n",
329
  "# Find the rows with the banned cards\n",
330
+ "banned_df = middleschool_df[\n",
331
+ " pd.DataFrame(middleschool_df.name.tolist()).isin(banlist).any(axis=1).values\n",
332
+ "]\n",
333
+ "print(\"Banned cards:\", banned_df.shape[0])\n",
334
  "# Append the banned cards to the main Middle School DataFrame,\n",
335
  "# then remove any rows that appear twice,\n",
336
  "# effectively leaving only the legal cards\n",
337
+ "middleschool_df = pd.concat([middleschool_df, banned_df]).drop_duplicates(keep=False)\n",
338
+ "print(\"Cards legal by set and not banned:\", middleschool_df.shape[0])\n",
 
339
  "middleschool_df = middleschool_df.reset_index(drop=True)\n",
340
+ "middleschool_df = middleschool_df[[\"oracle_id\", \"name\", \"name_ja\"]]\n"
341
  ]
342
  },
343
  {
 
354
  "metadata": {},
355
  "outputs": [],
356
  "source": [
357
+ "middleschool_df.to_csv(\"output/middleschool.csv\")\n",
358
+ "middleschool_df.to_json(\"output/middleschool.json\")\n"
359
  ]
360
  },
361
  {
 
363
  "cell_type": "markdown",
364
  "metadata": {},
365
  "source": [
366
+ "Feel free to delete everything in the `data` directory after you are done\n"
367
  ]
368
  }
369
  ],
 
383
  "name": "python",
384
  "nbconvert_exporter": "python",
385
  "pygments_lexer": "ipython3",
386
+ "version": "3.11.4"
387
  },
388
  "orig_nbformat": 4,
389
  "vscode": {