Spaces:

yoon-gu
/

pokemon

Sleeping

App Files Files Community

Yoon-gu Hwang commited on Sep 30, 2023

Commit

6cd90ae

•

1 Parent(s): 2562105

upload files

Browse files

Files changed (4) hide show

app.py +105 -0
make_dataset.py +66 -0
pokemon.json +0 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import gradio as gr
+import json
+import os
+import docx
+from docx.oxml.ns import qn
+from docx import Document
+from docx.shared import Inches, Pt, Cm, Mm, RGBColor
+from docx.enum.table import WD_TABLE_ALIGNMENT
+from docx2pdf import convert
+import pandas as pd
+with open('pokemon.json', 'r') as f:
+    pokemons = json.load(f)
+GEN_RANGE = {
+    "1세대": [1, 151],
+    "2세대": [152, 251],
+    "3세대": [252, 386],
+    "4세대": [387, 493],
+    "5세대": [494, 649],
+    "6세대": [650, 721],
+    "7세대": [722, 809],
+    "8세대": [810, 905],
+    "9세대": [906, 1017]
+}
+generation = gr.Dropdown(
+            [f"{k}세대" for k in range(1, 10)], value="1세대", label="포켓몬 세대", info="원하는 포켓몬 세대를 선택하세요."
+        )
+download = gr.File(label="Download a file")
+text = gr.DataFrame()
+def write_docx(gen):
+    filename = f'포켓몬{gen}.docx'
+    document = Document()
+    section = document.sections[0]
+    section.page_height = Mm(297)
+    section.page_width = Mm(210)
+    #changing the page margins
+    margin = 1.27
+    sections = document.sections
+    for section in sections:
+        section.top_margin = Cm(margin)
+        section.bottom_margin = Cm(margin)
+        section.left_margin = Cm(margin)
+        section.right_margin = Cm(margin)
+    document.styles['Normal'].font.name = 'NanumSquareRound'
+    document.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), 'NanumSquareRound')
+    data_dict = []
+    start, end = GEN_RANGE[gen]
+    for k in range(start, end+1):
+        name = pokemons[k-1]['name']
+        number = pokemons[k-1]['number']
+        types = pokemons[k-1]['types']
+        image_path = pokemons[k-1]['image_path']
+        data_dict.append(
+            dict(이름=name, No=number, 타입='+'.join(types))
+        )
+        df = pd.DataFrame(data_dict)
+        # Document
+        table = document.add_table(rows=4, cols=1)
+        table.alignment = WD_TABLE_ALIGNMENT.CENTER
+        table.style = 'Table Grid'
+        hdr_cells = table.rows[0].cells
+        hdr_cells[0].text = f"{number}"
+        hdr_cells[0].paragraphs[0].runs[0].font.size = Pt(50)
+        hdr_cells[0].paragraphs[0].alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
+        hdr_cells = table.rows[1].cells
+        p = hdr_cells[0].add_paragraph()
+        p.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
+        r = p.add_run()
+        r.add_picture(image_path, width=Cm(14.5), height=Cm(14.5))
+        r.add_break(docx.enum.text.WD_BREAK.LINE)
+        hdr_cells = table.rows[3].cells
+        hdr_cells[0].text = f"{name}"
+        hdr_cells[0].paragraphs[0].runs[0].font.size = Pt(70)
+        hdr_cells[0].paragraphs[0].runs[0].font.color.rgb = RGBColor(192, 192, 192)
+        hdr_cells[0].paragraphs[0].alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
+        hdr_cells = table.rows[2].cells
+        hdr_cells[0].text = f"{'+'.join(types)}"
+        hdr_cells[0].paragraphs[0].runs[0].font.size = Pt(70)
+        hdr_cells[0].paragraphs[0].runs[0].font.color.rgb = RGBColor(192, 192, 192)
+        hdr_cells[0].paragraphs[0].alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.CENTER
+        document.add_page_break()
+        yield df[['No', '이름', '타입']], filename.replace('docx', 'pdf')
+    if filename not in os.listdir():
+        document.save(filename)
+        convert(filename)
+    return df, filename.replace('docx', 'pdf')
+demo = gr.Interface(write_docx, generation, [text, download], title="대치동 포켓몬 도감 생성기",
+                    description="원하는 포켓몬 세대를 선택하고, 다운로드를 눌러주세요.")
+demo.queue(concurrency_count=3)
+demo.launch()

make_dataset.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import urllib.request
+import json
+import urllib.parse
+from urllib.parse import urlsplit, quote
+from urllib.request import Request, urlopen
+from bs4 import BeautifulSoup
+from tqdm import tqdm
+import pandas as pd
+# url = 'https://pokemon.fandom.com/ko/wiki/흥나숭_(포켓몬)'
+# url = 'https://pokemon.fandom.com/ko/wiki/나몰빼미_(포켓몬)'
+# url = 'https://pokemon.fandom.com/ko/wiki/도치마론_(포켓몬)'
+# url = 'https://pokemon.fandom.com/ko/wiki/비크티니_(포켓몬)'
+# url = 'https://pokemon.fandom.com/ko/wiki/모부기_(포켓몬)'
+# url = 'https://pokemon.fandom.com/ko/wiki/나무지기_(포켓몬)'
+# url = 'https://pokemon.fandom.com/ko/wiki/치코리타_(포켓몬)'
+# url = 'https://pokemon.fandom.com/ko/wiki/토게틱_(포켓몬)'
+# url = 'https://pokemon.fandom.com/ko/wiki/포푸니_(포켓몬)'
+url = 'https://pokemon.fandom.com/ko/wiki/이상해씨_(포켓몬)'
+# url = 'https://pokemon.fandom.com/ko/wiki/레트라_(포켓몬)'
+# url = 'https://pokemon.fandom.com/ko/wiki/신비록_(포켓몬)'
+url_info = urlsplit(url)
+encoded_url = f'{url_info.scheme}://{url_info.netloc}{quote(url_info.path)}'
+info = []
+erros = []
+target_number = 1017
+cnt = 0
+for _ in tqdm(range(target_number+2)):
+    cnt += 1
+    req = Request(encoded_url, headers={'User-Agent': 'Mozilla/5.0'})
+    res = urlopen(req)
+    html = res.read()
+    soup = BeautifulSoup(html, 'html.parser')
+    name = soup.find("div", {"class": "name-ko"}).text.strip()
+    number = soup.find("div", {"class": "index"}).text.strip()
+    try:
+        img_url = soup.find("div", {"class":"image rounded"}).find("img")['data-src']
+        filepath = f"images/{number.replace('.', '_')}_{name}.png"
+        urllib.request.urlretrieve(img_url, filepath)
+    except:
+        filepath = None
+    doc_text = '\n'.join([p.text.replace('\n', '').strip() for p in soup.find_all("p")])
+    types = [poke_type['title'].split(' ')[0].strip() for poke_type in soup.select('tbody > tr > td > div')[0].select('span > a')]
+    info.append(dict(
+        name=name,
+        number=number,
+        types=types,
+        doc_text=doc_text,
+        image_path=filepath,
+        url=encoded_url
+    ))
+    next_monster = soup.find("table").findAll("a")[-1]['href']
+    encoded_url = "https://pokemon.fandom.com" + next_monster
+    if number == f"No.{target_number:04d}":
+        break
+    if cnt >= target_number:
+        break
+pd.DataFrame(info).to_csv('pokemon.csv', index=False)
+with open('pokemon.json', 'w') as f:
+    json.dump(info, f, ensure_ascii=False, indent=4)

pokemon.json ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+python-docx
+pandas