Spaces:
Sleeping
Sleeping
# -*- coding: utf-8 -*- | |
# Use ReportLab package to create PDF poster | |
from reportlab.pdfbase import pdfmetrics | |
from reportlab.lib.pagesizes import A4 | |
from reportlab.lib.styles import getSampleStyleSheet | |
from reportlab.lib.units import cm | |
from reportlab.platypus import ( | |
SimpleDocTemplate, | |
Paragraph, | |
Spacer, | |
Table, | |
TableStyle, | |
Image, | |
Flowable, | |
ListFlowable, | |
ListItem, | |
) | |
from reportlab.lib import colors | |
from reportlab.pdfbase.cidfonts import UnicodeCIDFont | |
import yt_dlp | |
import cv2 | |
from PIL import Image as PILImage | |
import os | |
import tempfile | |
import re | |
import uuid | |
import pymupdf | |
''' | |
# UnicodeCIDfont names | |
$chs$ = Chinese Simplified (mainland): '$STSong-Light$' | |
$cht$ = Chinese Traditional (Taiwan): '$MSung-Light$', '$MHei-Medium$' | |
$kor$ = Korean: '$HYSMyeongJoStd-Medium$','$HYGothic-Medium$' | |
$jpn$ = Japanese: '$HeiseiMin-W3$', '$HeiseiKakuGo-W5$' | |
''' | |
# Configuration | |
# Register the Chinese font with Reportlab | |
pdfmetrics.registerFont(UnicodeCIDFont('STSong-Light')) | |
pdfmetrics.registerFont(UnicodeCIDFont('MSung-Light')) | |
PAGE_SIZE = A4 | |
MARGIN = 1.0 * cm | |
COLUMNS = 3 # Now using 3 columns | |
STYLE = getSampleStyleSheet() | |
style_body = STYLE["BodyText"] | |
style_title = STYLE['Title'] | |
style_title.alignment = 1 # center the title | |
# Calculate available width for tables | |
page_width = PAGE_SIZE[0] - 2*MARGIN | |
col_width = page_width / COLUMNS | |
img_width = col_width - 1*cm # Leave some padding | |
img_height = 5*cm | |
#========================================================================== | |
def create_poster(filename, images, lang, summary, url = None): | |
print("Output language is:", lang) | |
#generate PDF file | |
doc = SimpleDocTemplate(filename, pagesize=PAGE_SIZE, | |
leftMargin=MARGIN, rightMargin=MARGIN, | |
topMargin=MARGIN, bottomMargin=MARGIN) | |
story = [] | |
# Define a style with the detected language font | |
if lang.lower() == 'chinese': | |
style_body.fontName = 'STSong-Light' | |
style_title.fontName = 'STSong-Light' | |
else: | |
style_body.fontName = 'Helvetica' | |
style_title.fontName = 'Helvetica-Bold' | |
# Create table data for detected images | |
table_data = [] | |
list_content = [] | |
with tempfile.TemporaryDirectory() as temp_dir: | |
# Process output summary | |
question = [] | |
current_answer = [] | |
answers_part = summary.strip().split("\n") | |
title_text = "Summary" # initialize title | |
title = Paragraph(f"<b>{title_text}</b>", style_title) | |
for line in answers_part: #.split("\n"): | |
if re.search("0.", line): #title line | |
clean_line = line.replace("*", "") | |
title_text = clean_line.split("0.") | |
if len(title_text) > 1: | |
title_text = title_text[1] | |
else: | |
title_text = title_text[0] | |
index = title_text.find(':') | |
if index != -1: | |
title_text = title_text[index+1:] | |
title = Paragraph(f"<b>{title_text}</b>", style_title) | |
elif re.search(r'\d\.', line): | |
# Start of a new question-answer section | |
list_content.append(Spacer(1, 0.3*cm)) | |
if current_answer: | |
list_item = Paragraph(f"<b>{current_answer}</b>", style_body), | |
list_content.append(list_item) | |
current_answer = [] | |
line_content = line.replace("*", "").split(":") | |
question = str(line_content[0]) | |
question = Paragraph(f"<b>{question}</b>", style_body) | |
list_content.append(question) | |
if len(line_content) > 1: #handle same line answer | |
list_item = Paragraph(f"<b>{line_content[1]}</b>", style_body) | |
list_content.append(list_item) | |
elif line.strip() and question: | |
list_item = Paragraph(f"<b>{line}</b>", style_body) | |
list_content.append(list_item) | |
#add last section | |
if current_answer: | |
list_item = Paragraph(f"<b>{line}</b>", style_body) | |
list_content.append(list_item) | |
# construct the full list | |
markdown_output = ListFlowable(list_content, | |
bulletType='bullet', | |
bulletColor='white', value='circle' | |
), | |
output_list = markdown_output[0] #workaround for converting tuple to list | |
# Insert images into a table | |
row_cells = [] | |
for id, image in enumerate(images): | |
face_filename = f'{id}.jpg' | |
image_path = os.path.join(temp_dir, face_filename) | |
cv2.imwrite(image_path, image) | |
# Load the image back into memory because Image object needs filepath input | |
pil_img = PILImage.open(image_path) | |
# Create cell content | |
cell_content = [ | |
Spacer(1, 0.3*cm), | |
Image(image_path, width=img_width, height=img_height), | |
Spacer(1, 0.3*cm), | |
] | |
row_cells.append(cell_content) | |
# Add row to table | |
table_data.append(row_cells) | |
# Create table with styling | |
tbl = Table(table_data, | |
colWidths=[col_width]*COLUMNS, | |
rowHeights=img_height+0.5*cm) | |
tbl.setStyle(TableStyle([ | |
('ALIGN', (0,0), (-1,-1), 'CENTER'), | |
('VALIGN', (0,0), (-1,-1), 'CENTER'), | |
('PADDING', (0,0), (-1,-1), 10), | |
('BOX', (0,0), (-1,-1), 0.5, colors.white), | |
('INNERGRID', (0,0), (-1,-1), 0.5, colors.white), | |
])) | |
# add flowables | |
story.append(title) | |
story.append(tbl) | |
story.append(Spacer(1,0.3*cm)) | |
story.append(output_list) | |
story.append(Spacer(1,0.5*cm)) | |
# Clickable video link | |
if url: | |
link_text = f'<link href="{url}"><font color="{colors.blue}"><u>View Original Video</u></font></link>' | |
link_paragraph = Paragraph(link_text, style_body) | |
story.append(link_paragraph) | |
# build a page | |
doc.build(story) | |
return filename | |
def generate_unique_filename(extension): | |
return f"{uuid.uuid4()}{extension}" | |
def generate_tmp_filename(basename, extension): | |
return f"{basename}{extension}" | |
def pdf_to_jpg(pdf_path, output_path): | |
doc = pymupdf.open(pdf_path) | |
page = doc.load_page(0) | |
pix = page.get_pixmap() | |
pix.save(output_path, "JPEG") | |
image = PILImage.open(output_path) | |
doc.close() | |
return image, output_path | |
def download_youtube_video(url): | |
"""Downloads a Youtube video using yt-dlp.""" | |
basename = os.path.basename(url) | |
output_path = generate_tmp_filename(basename, ".mp4") | |
ydl_opts = { | |
'outtmpl': output_path, # Path where the video will be saved | |
'format': 'best', # Download the best quality available | |
'cookiefile': 'cookies.txt', # Path to your cookies file #JW 20250115 | |
} | |
try: | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
ydl.download([url]) | |
return output_path | |
except Exception as e: | |
print("load yt_dlp:", e) | |
return str(e) | |