Yoon-gu Hwang commited on
Commit
6cd90ae
1 Parent(s): 2562105

upload files

Browse files
Files changed (4) hide show
  1. app.py +105 -0
  2. make_dataset.py +66 -0
  3. pokemon.json +0 -0
  4. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import json
3
+ import os
4
+ import docx
5
+ from docx.oxml.ns import qn
6
+ from docx import Document
7
+ from docx.shared import Inches, Pt, Cm, Mm, RGBColor
8
+ from docx.enum.table import WD_TABLE_ALIGNMENT
9
+ from docx2pdf import convert
10
+ import pandas as pd
11
+
12
+ with open('pokemon.json', 'r') as f:
13
+ pokemons = json.load(f)
14
+
15
+ GEN_RANGE = {
16
+ "1세대": [1, 151],
17
+ "2세대": [152, 251],
18
+ "3세대": [252, 386],
19
+ "4세대": [387, 493],
20
+ "5세대": [494, 649],
21
+ "6세대": [650, 721],
22
+ "7세대": [722, 809],
23
+ "8세대": [810, 905],
24
+ "9세대": [906, 1017]
25
+ }
26
+
27
+ generation = gr.Dropdown(
28
+ [f"{k}세대" for k in range(1, 10)], value="1세대", label="포켓몬 세대", info="원하는 포켓몬 세대를 선택하세요."
29
+ )
30
+
31
+ download = gr.File(label="Download a file")
32
+ text = gr.DataFrame()
33
+
34
+ def write_docx(gen):
35
+ filename = f'포켓몬{gen}.docx'
36
+
37
+ document = Document()
38
+ section = document.sections[0]
39
+ section.page_height = Mm(297)
40
+ section.page_width = Mm(210)
41
+ #changing the page margins
42
+ margin = 1.27
43
+ sections = document.sections
44
+ for section in sections:
45
+ section.top_margin = Cm(margin)
46
+ section.bottom_margin = Cm(margin)
47
+ section.left_margin = Cm(margin)
48
+ section.right_margin = Cm(margin)
49
+ document.styles['Normal'].font.name = 'NanumSquareRound'
50
+ document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), 'NanumSquareRound')
51
+
52
+ data_dict = []
53
+ start, end = GEN_RANGE[gen]
54
+ for k in range(start, end+1):
55
+ name = pokemons[k-1]['name']
56
+ number = pokemons[k-1]['number']
57
+ types = pokemons[k-1]['types']
58
+ image_path = pokemons[k-1]['image_path']
59
+
60
+ data_dict.append(
61
+ dict(이름=name, No=number, 타입='+'.join(types))
62
+ )
63
+
64
+ df = pd.DataFrame(data_dict)
65
+ # Document
66
+ table = document.add_table(rows=4, cols=1)
67
+ table.alignment = WD_TABLE_ALIGNMENT.CENTER
68
+ table.style = 'Table Grid'
69
+
70
+ hdr_cells = table.rows[0].cells
71
+ hdr_cells[0].text = f"{number}"
72
+ hdr_cells[0].paragraphs[0].runs[0].font.size = Pt(50)
73
+ hdr_cells[0].paragraphs[0].alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
74
+
75
+ hdr_cells = table.rows[1].cells
76
+ p = hdr_cells[0].add_paragraph()
77
+ p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
78
+ r = p.add_run()
79
+ r.add_picture(image_path, width=Cm(14.5), height=Cm(14.5))
80
+ r.add_break(docx.enum.text.WD_BREAK.LINE)
81
+
82
+ hdr_cells = table.rows[3].cells
83
+ hdr_cells[0].text = f"{name}"
84
+ hdr_cells[0].paragraphs[0].runs[0].font.size = Pt(70)
85
+ hdr_cells[0].paragraphs[0].runs[0].font.color.rgb = RGBColor(192, 192, 192)
86
+ hdr_cells[0].paragraphs[0].alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
87
+
88
+ hdr_cells = table.rows[2].cells
89
+ hdr_cells[0].text = f"{'+'.join(types)}"
90
+ hdr_cells[0].paragraphs[0].runs[0].font.size = Pt(70)
91
+ hdr_cells[0].paragraphs[0].runs[0].font.color.rgb = RGBColor(192, 192, 192)
92
+ hdr_cells[0].paragraphs[0].alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
93
+
94
+ document.add_page_break()
95
+ yield df[['No', '이름', '타입']], filename.replace('docx', 'pdf')
96
+
97
+ if filename not in os.listdir():
98
+ document.save(filename)
99
+ convert(filename)
100
+ return df, filename.replace('docx', 'pdf')
101
+
102
+ demo = gr.Interface(write_docx, generation, [text, download], title="대치동 포켓몬 도감 생성기",
103
+ description="원하는 포켓몬 세대를 선택하고, 다운로드를 눌러주세요.")
104
+ demo.queue(concurrency_count=3)
105
+ demo.launch()
make_dataset.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import urllib.request
2
+ import json
3
+ import urllib.parse
4
+ from urllib.parse import urlsplit, quote
5
+ from urllib.request import Request, urlopen
6
+ from bs4 import BeautifulSoup
7
+ from tqdm import tqdm
8
+ import pandas as pd
9
+
10
+ # url = 'https://pokemon.fandom.com/ko/wiki/흥나숭_(포켓몬)'
11
+ # url = 'https://pokemon.fandom.com/ko/wiki/나몰빼미_(포켓몬)'
12
+ # url = 'https://pokemon.fandom.com/ko/wiki/도치마론_(포켓몬)'
13
+ # url = 'https://pokemon.fandom.com/ko/wiki/비크티니_(포켓몬)'
14
+ # url = 'https://pokemon.fandom.com/ko/wiki/모부기_(포켓몬)'
15
+ # url = 'https://pokemon.fandom.com/ko/wiki/나무지기_(포켓몬)'
16
+ # url = 'https://pokemon.fandom.com/ko/wiki/치코리타_(포켓몬)'
17
+ # url = 'https://pokemon.fandom.com/ko/wiki/토게틱_(포켓몬)'
18
+ # url = 'https://pokemon.fandom.com/ko/wiki/포푸니_(포켓몬)'
19
+ url = 'https://pokemon.fandom.com/ko/wiki/이상해씨_(포켓몬)'
20
+ # url = 'https://pokemon.fandom.com/ko/wiki/레트라_(포켓몬)'
21
+ # url = 'https://pokemon.fandom.com/ko/wiki/신비록_(포켓몬)'
22
+
23
+ url_info = urlsplit(url)
24
+ encoded_url = f'{url_info.scheme}://{url_info.netloc}{quote(url_info.path)}'
25
+
26
+ info = []
27
+ erros = []
28
+ target_number = 1017
29
+ cnt = 0
30
+ for _ in tqdm(range(target_number+2)):
31
+ cnt += 1
32
+ req = Request(encoded_url, headers={'User-Agent': 'Mozilla/5.0'})
33
+ res = urlopen(req)
34
+ html = res.read()
35
+ soup = BeautifulSoup(html, 'html.parser')
36
+
37
+ name = soup.find("div", {"class": "name-ko"}).text.strip()
38
+ number = soup.find("div", {"class": "index"}).text.strip()
39
+ try:
40
+ img_url = soup.find("div", {"class":"image rounded"}).find("img")['data-src']
41
+ filepath = f"images/{number.replace('.', '_')}_{name}.png"
42
+ urllib.request.urlretrieve(img_url, filepath)
43
+ except:
44
+ filepath = None
45
+ doc_text = '\n'.join([p.text.replace('\n', '').strip() for p in soup.find_all("p")])
46
+ types = [poke_type['title'].split(' ')[0].strip() for poke_type in soup.select('tbody > tr > td > div')[0].select('span > a')]
47
+
48
+ info.append(dict(
49
+ name=name,
50
+ number=number,
51
+ types=types,
52
+ doc_text=doc_text,
53
+ image_path=filepath,
54
+ url=encoded_url
55
+ ))
56
+ next_monster = soup.find("table").findAll("a")[-1]['href']
57
+ encoded_url = "https://pokemon.fandom.com" + next_monster
58
+ if number == f"No.{target_number:04d}":
59
+ break
60
+
61
+ if cnt >= target_number:
62
+ break
63
+
64
+ pd.DataFrame(info).to_csv('pokemon.csv', index=False)
65
+ with open('pokemon.json', 'w') as f:
66
+ json.dump(info, f, ensure_ascii=False, indent=4)
pokemon.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ python-docx
3
+ pandas