Alejandro Cremades commited on
Commit
dd85b1e
1 Parent(s): 106cfa9

Bonus, comments

Browse files
Files changed (1) hide show
  1. middleschool-cardlist.ipynb +27 -9
middleschool-cardlist.ipynb CHANGED
@@ -44,6 +44,7 @@
44
  " 'INV', 'PLS', '7ED', 'APC', 'ODY', 'TOR', 'JUD', 'ONS',\n",
45
  " 'LGN', 'SCG', 'PDRC', 'PHPR', 'ATH', 'BRB', 'BTD', 'DKM']\n",
46
  "for set in setlist:\n",
 
47
  " command = 'cat data/AllPrintings.json | jq \\'.data.\\\"' + \\\n",
48
  " set + '\\\".cards\\' > data/set_' + set + '.json'\n",
49
  " !{command}\n"
@@ -75,19 +76,15 @@
75
  },
76
  {
77
  "cell_type": "code",
78
- "execution_count": 18,
79
  "metadata": {},
80
  "outputs": [
81
  {
82
  "name": "stdout",
83
  "output_type": "stream",
84
  "text": [
85
- " oracle_id name name_ja\n",
86
- "0 5acc8b39-3c3e-4012-8cfd-ac3c2c4ca982 Arc Lightning 弧状の稲妻\n",
87
- "0 5acc8b39-3c3e-4012-8cfd-ac3c2c4ca982 Arc Lightning None\n",
88
- " oracle_id name name_ja\n",
89
- "0 5acc8b39-3c3e-4012-8cfd-ac3c2c4ca982 Arc Lightning 弧状の稲妻\n",
90
  "5800 cards found\n",
 
91
  " oracle_id name name_ja\n",
92
  "0 8adbba6e-03ef-4278-aec5-8a4496b377a8 Abandon Hope 断念\n",
93
  "0 5a70ccfa-d12d-4e62-a1a4-f05cda2fd442 Abandoned Outpost 見捨てられた前哨地\n",
@@ -110,6 +107,7 @@
110
  "with open(\"data/middleschool.json\") as json_data:\n",
111
  " cards = json.loads(json_data.read())\n",
112
  "\n",
 
113
  "column_names = ['oracle_id', 'name', 'name_ja']\n",
114
  "middleschool_df = pd.DataFrame(columns=column_names)\n",
115
  "for card in cards:\n",
@@ -117,6 +115,7 @@
117
  " name = card['name']\n",
118
  " lang_ja = [lang for lang in card['foreignData']\n",
119
  " if lang['language'] == 'Japanese']\n",
 
120
  " if (len(lang_ja) > 0):\n",
121
  " name_ja = lang_ja[0]['name']\n",
122
  " else:\n",
@@ -128,20 +127,22 @@
128
  " })\n",
129
  " middleschool_df = pd.concat([middleschool_df, temporary_df])\n",
130
  "\n",
 
131
  "middleschool_df = middleschool_df.sort_values(by=['name', 'name_ja'])\n",
132
- "print(middleschool_df[middleschool_df['name'] == 'Arc Lightning'])\n",
133
  "middleschool_df = middleschool_df.drop_duplicates(subset=['oracle_id'])\n",
134
- "print(middleschool_df[middleschool_df['name'] == 'Arc Lightning'])\n",
135
  "print(middleschool_df.shape[0], 'cards found')\n",
 
136
  "print(middleschool_df.head())\n",
137
  "print(middleschool_df.tail())\n"
138
  ]
139
  },
140
  {
 
141
  "cell_type": "markdown",
142
  "metadata": {},
143
  "source": [
144
- "Exclude all cards banned in Middle School\n"
145
  ]
146
  },
147
  {
@@ -169,11 +170,16 @@
169
  "def find_japanese_name(name):\n",
170
  " url = 'http://whisper.wisdom-guild.net/search.php?q=' + name\n",
171
  " r = session.get(url)\n",
 
172
  " title = r.html.find('title')[0].text\n",
 
173
  " idx = title.find(name)\n",
 
 
174
  " if idx == 0:\n",
175
  " return None\n",
176
  " else:\n",
 
177
  " name_ja = title[0:idx - 1]\n",
178
  " return name_ja\n",
179
  "\n",
@@ -192,6 +198,14 @@
192
  "print(name, '→', find_japanese_name(name))\n"
193
  ]
194
  },
 
 
 
 
 
 
 
 
195
  {
196
  "cell_type": "code",
197
  "execution_count": 19,
@@ -235,9 +249,13 @@
235
  " \"Yawgmoth's Bargain\",\n",
236
  " \"Yawgmoth's Will\"]\n",
237
  "print('Cards legal by set:', middleschool_df.shape[0])\n",
 
238
  "banned_df = middleschool_df[pd.DataFrame(\n",
239
  " middleschool_df.name.tolist()).isin(banlist).any(axis=1).values]\n",
240
  "print('Banned cards:', banned_df.shape[0])\n",
 
 
 
241
  "middleschool_df = pd.concat(\n",
242
  " [middleschool_df, banned_df]).drop_duplicates(keep=False)\n",
243
  "print('Cards legal by set and not banned:', middleschool_df.shape[0])\n",
 
44
  " 'INV', 'PLS', '7ED', 'APC', 'ODY', 'TOR', 'JUD', 'ONS',\n",
45
  " 'LGN', 'SCG', 'PDRC', 'PHPR', 'ATH', 'BRB', 'BTD', 'DKM']\n",
46
  "for set in setlist:\n",
47
+ " # Write a separate JSON document for each Middle School legal set\n",
48
  " command = 'cat data/AllPrintings.json | jq \\'.data.\\\"' + \\\n",
49
  " set + '\\\".cards\\' > data/set_' + set + '.json'\n",
50
  " !{command}\n"
 
76
  },
77
  {
78
  "cell_type": "code",
79
+ "execution_count": 56,
80
  "metadata": {},
81
  "outputs": [
82
  {
83
  "name": "stdout",
84
  "output_type": "stream",
85
  "text": [
 
 
 
 
 
86
  "5800 cards found\n",
87
+ "These are the first and last 5 cards\n",
88
  " oracle_id name name_ja\n",
89
  "0 8adbba6e-03ef-4278-aec5-8a4496b377a8 Abandon Hope 断念\n",
90
  "0 5a70ccfa-d12d-4e62-a1a4-f05cda2fd442 Abandoned Outpost 見捨てられた前哨地\n",
 
107
  "with open(\"data/middleschool.json\") as json_data:\n",
108
  " cards = json.loads(json_data.read())\n",
109
  "\n",
110
+ "# Create a pandas DataFrame with all cards from all legal sets\n",
111
  "column_names = ['oracle_id', 'name', 'name_ja']\n",
112
  "middleschool_df = pd.DataFrame(columns=column_names)\n",
113
  "for card in cards:\n",
 
115
  " name = card['name']\n",
116
  " lang_ja = [lang for lang in card['foreignData']\n",
117
  " if lang['language'] == 'Japanese']\n",
118
+ " # Some cards do not have a Japanese name\n",
119
  " if (len(lang_ja) > 0):\n",
120
  " name_ja = lang_ja[0]['name']\n",
121
  " else:\n",
 
127
  " })\n",
128
  " middleschool_df = pd.concat([middleschool_df, temporary_df])\n",
129
  "\n",
130
+ "# For cards with multiple occurrences, put the rows that have the Japanese name on top\n",
131
  "middleschool_df = middleschool_df.sort_values(by=['name', 'name_ja'])\n",
132
+ "# For cards with multiple occurrences, delete all rows except for the top one\n",
133
  "middleschool_df = middleschool_df.drop_duplicates(subset=['oracle_id'])\n",
 
134
  "print(middleschool_df.shape[0], 'cards found')\n",
135
+ "print('These are the first and last 5 cards')\n",
136
  "print(middleschool_df.head())\n",
137
  "print(middleschool_df.tail())\n"
138
  ]
139
  },
140
  {
141
+ "attachments": {},
142
  "cell_type": "markdown",
143
  "metadata": {},
144
  "source": [
145
+ "Find Japanese names for cards that were not released in Japanese in Middle School legal sets"
146
  ]
147
  },
148
  {
 
170
  "def find_japanese_name(name):\n",
171
  " url = 'http://whisper.wisdom-guild.net/search.php?q=' + name\n",
172
  " r = session.get(url)\n",
173
+ " # Find the text on the <title> element in the HTML document\n",
174
  " title = r.html.find('title')[0].text\n",
175
+ " # Find the position of the English card name within the title\n",
176
  " idx = title.find(name)\n",
177
+ " # The Japanese name should be before the English name, \n",
178
+ " # so if idx is 0, there is no Japanese name\n",
179
  " if idx == 0:\n",
180
  " return None\n",
181
  " else:\n",
182
+ " # Take only the Japanese name from the title\n",
183
  " name_ja = title[0:idx - 1]\n",
184
  " return name_ja\n",
185
  "\n",
 
198
  "print(name, '→', find_japanese_name(name))\n"
199
  ]
200
  },
201
+ {
202
+ "attachments": {},
203
+ "cell_type": "markdown",
204
+ "metadata": {},
205
+ "source": [
206
+ "Exclude all cards banned in Middle School"
207
+ ]
208
+ },
209
  {
210
  "cell_type": "code",
211
  "execution_count": 19,
 
249
  " \"Yawgmoth's Bargain\",\n",
250
  " \"Yawgmoth's Will\"]\n",
251
  "print('Cards legal by set:', middleschool_df.shape[0])\n",
252
+ "# Find the rows with the banned cards\n",
253
  "banned_df = middleschool_df[pd.DataFrame(\n",
254
  " middleschool_df.name.tolist()).isin(banlist).any(axis=1).values]\n",
255
  "print('Banned cards:', banned_df.shape[0])\n",
256
+ "# Append the banned cards to the main Middle School DataFrame,\n",
257
+ "# then remove any rows that appear twice,\n",
258
+ "# effectively leaving only the legal cards\n",
259
  "middleschool_df = pd.concat(\n",
260
  " [middleschool_df, banned_df]).drop_duplicates(keep=False)\n",
261
  "print('Cards legal by set and not banned:', middleschool_df.shape[0])\n",