Spaces:
Running
Running
whispersound
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ from fpdf import FPDF
|
|
8 |
from datetime import datetime
|
9 |
from zoneinfo import ZoneInfo
|
10 |
from sklearn.feature_extraction.text import CountVectorizer
|
|
|
11 |
|
12 |
# OpenAI API ํด๋ผ์ด์ธํธ ์ค์
|
13 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
@@ -515,8 +516,13 @@ def save_to_pdf(blog_post, user_topic):
|
|
515 |
current_tag = ''
|
516 |
buffer = ''
|
517 |
is_bold = False
|
|
|
|
|
|
|
|
|
518 |
|
519 |
for part in re.split(tag_pattern, blog_post):
|
|
|
520 |
if part in ['h1', 'h2', 'h3', 'p', 'strong', 'li', 'br']:
|
521 |
if buffer:
|
522 |
if current_tag in ['h1', 'h2', 'h3']:
|
@@ -524,30 +530,49 @@ def save_to_pdf(blog_post, user_topic):
|
|
524 |
pdf.multi_cell(0, 10, buffer.strip(), align='L')
|
525 |
pdf.ln(5)
|
526 |
elif current_tag == 'p':
|
527 |
-
pdf.set_font("NanumGothic", '',
|
|
|
528 |
pdf.multi_cell(0, 6, buffer.strip(), align='J')
|
529 |
pdf.ln(5)
|
530 |
elif current_tag == 'li':
|
531 |
-
pdf.set_font("NanumGothic", '',
|
|
|
532 |
pdf.multi_cell(0, 6, "โข " + buffer.strip(), align='J')
|
533 |
elif current_tag == 'br':
|
534 |
pdf.ln(5)
|
535 |
buffer = ''
|
536 |
current_tag = part
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
537 |
elif part == 'strong':
|
538 |
is_bold = True
|
539 |
-
pdf.set_font("NanumGothic", 'B',
|
540 |
elif part == '/strong':
|
541 |
is_bold = False
|
542 |
-
pdf.set_font("NanumGothic", '',
|
543 |
elif part.startswith('/') or part == 'div':
|
544 |
continue
|
545 |
elif not tag_pattern.match(part) and part.strip():
|
546 |
-
|
547 |
-
|
548 |
# ๋ง์ง๋ง ๋ฒํผ ์ฒ๋ฆฌ
|
549 |
if buffer:
|
550 |
-
pdf.set_font("NanumGothic", '',
|
|
|
551 |
pdf.multi_cell(0, 6, buffer.strip(), align='J')
|
552 |
|
553 |
# PDF ์ ์ฅ
|
|
|
8 |
from datetime import datetime
|
9 |
from zoneinfo import ZoneInfo
|
10 |
from sklearn.feature_extraction.text import CountVectorizer
|
11 |
+
from weasyprint import HTML
|
12 |
|
13 |
# OpenAI API ํด๋ผ์ด์ธํธ ์ค์
|
14 |
openai.api_key = os.getenv("OPENAI_API_KEY")
|
|
|
516 |
current_tag = ''
|
517 |
buffer = ''
|
518 |
is_bold = False
|
519 |
+
|
520 |
+
# ์คํ์ผ ์์ฑ ์ด๊ธฐํ
|
521 |
+
font_size = 11
|
522 |
+
color = (0, 0, 0) # ๊ธฐ๋ณธ ๊ธ์ ์ (๊ฒ์ )
|
523 |
|
524 |
for part in re.split(tag_pattern, blog_post):
|
525 |
+
# ํ๊ทธ ์์ ๋ถ๋ถ
|
526 |
if part in ['h1', 'h2', 'h3', 'p', 'strong', 'li', 'br']:
|
527 |
if buffer:
|
528 |
if current_tag in ['h1', 'h2', 'h3']:
|
|
|
530 |
pdf.multi_cell(0, 10, buffer.strip(), align='L')
|
531 |
pdf.ln(5)
|
532 |
elif current_tag == 'p':
|
533 |
+
pdf.set_font("NanumGothic", '', font_size)
|
534 |
+
pdf.set_text_color(*color) # ์ค์ ๋ ์์ ์ ์ฉ
|
535 |
pdf.multi_cell(0, 6, buffer.strip(), align='J')
|
536 |
pdf.ln(5)
|
537 |
elif current_tag == 'li':
|
538 |
+
pdf.set_font("NanumGothic", '', font_size)
|
539 |
+
pdf.set_text_color(*color)
|
540 |
pdf.multi_cell(0, 6, "โข " + buffer.strip(), align='J')
|
541 |
elif current_tag == 'br':
|
542 |
pdf.ln(5)
|
543 |
buffer = ''
|
544 |
current_tag = part
|
545 |
+
|
546 |
+
# ์คํ์ผ ์์ฑ ๋ฐ์
|
547 |
+
elif 'style=' in part:
|
548 |
+
styles = re.findall(r'style="(.*?)"', part)
|
549 |
+
for style in styles:
|
550 |
+
if 'color:' in style:
|
551 |
+
color_match = re.search(r'color: (#\w+);', style)
|
552 |
+
if color_match:
|
553 |
+
color_hex = color_match.group(1)
|
554 |
+
r, g, b = tuple(int(color_hex[i:i+2], 16) for i in (1, 3, 5))
|
555 |
+
color = (r, g, b) # RGB ๊ฐ์ผ๋ก ๋ณํํ์ฌ ์ ์ฉ
|
556 |
+
if 'font-size:' in style:
|
557 |
+
size_match = re.search(r'font-size: (\d+)px;', style)
|
558 |
+
if size_match:
|
559 |
+
font_size = int(size_match.group(1)) * 0.75 # pt ๋จ์๋ก ๋ณํ (1px = 0.75pt)
|
560 |
+
|
561 |
elif part == 'strong':
|
562 |
is_bold = True
|
563 |
+
pdf.set_font("NanumGothic", 'B', font_size)
|
564 |
elif part == '/strong':
|
565 |
is_bold = False
|
566 |
+
pdf.set_font("NanumGothic", '', font_size)
|
567 |
elif part.startswith('/') or part == 'div':
|
568 |
continue
|
569 |
elif not tag_pattern.match(part) and part.strip():
|
570 |
+
buffer += part.strip() + ' '
|
571 |
+
|
572 |
# ๋ง์ง๋ง ๋ฒํผ ์ฒ๋ฆฌ
|
573 |
if buffer:
|
574 |
+
pdf.set_font("NanumGothic", '', font_size)
|
575 |
+
pdf.set_text_color(*color)
|
576 |
pdf.multi_cell(0, 6, buffer.strip(), align='J')
|
577 |
|
578 |
# PDF ์ ์ฅ
|