slide-deck-ai / helpers /pptx_helper.py
barunsaha's picture
Probabilistically add background & foreground images to some of the slides
437156d
raw
history blame
28.1 kB
"""
A set of functions to create a PowerPoint slide deck.
"""
import logging
import pathlib
import random
import re
import sys
import tempfile
from typing import List, Tuple, Optional
import json5
import pptx
from dotenv import load_dotenv
from pptx.enum.shapes import MSO_AUTO_SHAPE_TYPE
from pptx.shapes.placeholder import PicturePlaceholder, SlidePlaceholder
sys.path.append('..')
sys.path.append('../..')
import helpers.image_search as ims
from global_config import GlobalConfig
load_dotenv()
# English Metric Unit (used by PowerPoint) to inches
EMU_TO_INCH_SCALING_FACTOR = 1.0 / 914400
INCHES_1_5 = pptx.util.Inches(1.5)
INCHES_1 = pptx.util.Inches(1)
INCHES_0_5 = pptx.util.Inches(0.5)
INCHES_0_4 = pptx.util.Inches(0.4)
INCHES_0_3 = pptx.util.Inches(0.3)
STEP_BY_STEP_PROCESS_MARKER = '>> '
IMAGE_DISPLAY_PROBABILITY = 0.3
FOREGROUND_IMAGE_PROBABILITY = 0.75
SLIDE_NUMBER_REGEX = re.compile(r"^slide[ ]+\d+:", re.IGNORECASE)
logger = logging.getLogger(__name__)
def remove_slide_number_from_heading(header: str) -> str:
"""
Remove the slide number from a given slide header.
:param header: The header of a slide.
:return: The header without slide number.
"""
if SLIDE_NUMBER_REGEX.match(header):
idx = header.find(':')
header = header[idx + 1:]
return header
def generate_powerpoint_presentation(
structured_data: str,
slides_template: str,
output_file_path: pathlib.Path
) -> List:
"""
Create and save a PowerPoint presentation file containing the content in JSON format.
:param structured_data: The presentation contents as "JSON" (may contain trailing commas).
:param slides_template: The PPTX template to use.
:param output_file_path: The path of the PPTX file to save as.
:return: A list of presentation title and slides headers.
"""
# The structured "JSON" might contain trailing commas, so using json5
parsed_data = json5.loads(structured_data)
logger.debug(
'*** Using PPTX template: %s',
GlobalConfig.PPTX_TEMPLATE_FILES[slides_template]['file']
)
presentation = pptx.Presentation(GlobalConfig.PPTX_TEMPLATE_FILES[slides_template]['file'])
slide_width_inch, slide_height_inch = _get_slide_width_height_inches(presentation)
# The title slide
title_slide_layout = presentation.slide_layouts[0]
slide = presentation.slides.add_slide(title_slide_layout)
title = slide.shapes.title
subtitle = slide.placeholders[1]
title.text = parsed_data['title']
logger.info(
'PPT title: %s | #slides: %d',
title.text, len(parsed_data['slides'])
)
subtitle.text = 'by Myself and SlideDeck AI :)'
all_headers = [title.text, ]
# Add content in a loop
for a_slide in parsed_data['slides']:
is_processing_done = _handle_double_col_layout(
presentation=presentation,
slide_json=a_slide,
slide_width_inch=slide_width_inch,
slide_height_inch=slide_height_inch
)
if not is_processing_done:
is_processing_done = _handle_step_by_step_process(
presentation=presentation,
slide_json=a_slide,
slide_width_inch=slide_width_inch,
slide_height_inch=slide_height_inch
)
if not is_processing_done:
_handle_default_display(
presentation=presentation,
slide_json=a_slide,
slide_width_inch=slide_width_inch,
slide_height_inch=slide_height_inch
)
# The thank-you slide
last_slide_layout = presentation.slide_layouts[0]
slide = presentation.slides.add_slide(last_slide_layout)
title = slide.shapes.title
title.text = 'Thank you!'
presentation.save(output_file_path)
return all_headers
def get_flat_list_of_contents(items: list, level: int) -> List[Tuple]:
"""
Flatten a (hierarchical) list of bullet points to a single list containing each item and
its level.
:param items: A bullet point (string or list).
:param level: The current level of hierarchy.
:return: A list of (bullet item text, hierarchical level) tuples.
"""
flat_list = []
for item in items:
if isinstance(item, str):
flat_list.append((item, level))
elif isinstance(item, list):
flat_list = flat_list + get_flat_list_of_contents(item, level + 1)
return flat_list
def _handle_default_display(
presentation: pptx.Presentation,
slide_json: dict,
slide_width_inch: float,
slide_height_inch: float
):
"""
Display a list of text in a slide.
:param presentation: The presentation object.
:param slide_json: The content of the slide as JSON data.
:param slide_width_inch: The width of the slide in inches.
:param slide_height_inch: The height of the slide in inches.
"""
status = False
if random.random() < IMAGE_DISPLAY_PROBABILITY:
if random.random() < FOREGROUND_IMAGE_PROBABILITY:
status = _handle_display_image__in_foreground(
presentation,
slide_json,
slide_width_inch,
slide_height_inch
)
else:
status = _handle_display_image__in_background(
presentation,
slide_json,
slide_width_inch,
slide_height_inch
)
if status:
return
# Image display failed, so display only text
bullet_slide_layout = presentation.slide_layouts[1]
slide = presentation.slides.add_slide(bullet_slide_layout)
shapes = slide.shapes
title_shape = shapes.title
body_shape = shapes.placeholders[1]
title_shape.text = remove_slide_number_from_heading(slide_json['heading'])
text_frame = body_shape.text_frame
# The bullet_points may contain a nested hierarchy of JSON arrays
# In some scenarios, it may contain objects (dictionaries) because the LLM generated so
# ^ The second scenario is not covered
flat_items_list = get_flat_list_of_contents(slide_json['bullet_points'], level=0)
for idx, an_item in enumerate(flat_items_list):
if idx == 0:
text_frame.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
else:
paragraph = text_frame.add_paragraph()
paragraph.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
paragraph.level = an_item[1]
_handle_key_message(
the_slide=slide,
slide_json=slide_json,
slide_height_inch=slide_height_inch,
slide_width_inch=slide_width_inch
)
def _handle_display_image__in_foreground(
presentation: pptx.Presentation(),
slide_json: dict,
slide_width_inch: float,
slide_height_inch: float
) -> bool:
"""
Create a slide with text and image using a picture placeholder layout.
:param presentation: The presentation object.
:param slide_json: The content of the slide as JSON data.
:param slide_width_inch: The width of the slide in inches.
:param slide_height_inch: The height of the slide in inches.
:return: True if the side has been processed.
"""
img_keywords = slide_json['img_keywords'].strip()
slide = presentation.slide_layouts[8] # Picture with Caption
slide = presentation.slides.add_slide(slide)
title_placeholder = slide.shapes.title
title_placeholder.text = remove_slide_number_from_heading(slide_json['heading'])
pic_col: PicturePlaceholder = slide.shapes.placeholders[1]
text_col: SlidePlaceholder = slide.shapes.placeholders[2]
flat_items_list = get_flat_list_of_contents(slide_json['bullet_points'], level=0)
for idx, an_item in enumerate(flat_items_list):
if idx == 0:
text_col.text_frame.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
else:
paragraph = text_col.text_frame.add_paragraph()
paragraph.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
paragraph.level = an_item[1]
if not img_keywords:
# No keywords, so no image search and addition
return True
try:
photo_url, page_url = ims.get_photo_url_from_api_response(
ims.search_pexels(query=img_keywords, size='medium')
)
if photo_url:
pic_col.insert_picture(
ims.get_image_from_url(photo_url)
)
_add_text_at_bottom(
slide=slide,
slide_width_inch=slide_width_inch,
slide_height_inch=slide_height_inch,
text='Photo provided by Pexels',
hyperlink=page_url
)
except Exception as ex:
logger.error(
'*** Error occurred while running adding image to slide: %s',
str(ex)
)
return True
def _handle_display_image__in_background(
presentation: pptx.Presentation(),
slide_json: dict,
slide_width_inch: float,
slide_height_inch: float
) -> bool:
"""
Add a slide with text and an image in the background. It works just like
`_handle_default_display()` but with a background image added.
:param presentation: The presentation object.
:param slide_json: The content of the slide as JSON data.
:param slide_width_inch: The width of the slide in inches.
:param slide_height_inch: The height of the slide in inches.
:return: True if the slide has been processed.
"""
img_keywords = slide_json['img_keywords'].strip()
# Add a photo in the background, text in the foreground
slide = presentation.slides.add_slide(presentation.slide_layouts[1])
title_shape = slide.shapes.title
body_shape = slide.shapes.placeholders[1]
title_shape.text = remove_slide_number_from_heading(slide_json['heading'])
flat_items_list = get_flat_list_of_contents(slide_json['bullet_points'], level=0)
for idx, an_item in enumerate(flat_items_list):
if idx == 0:
body_shape.text_frame.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
else:
paragraph = body_shape.text_frame.add_paragraph()
paragraph.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
paragraph.level = an_item[1]
if not img_keywords:
# No keywords, so no image search and addition
return True
try:
photo_url, page_url = ims.get_photo_url_from_api_response(
ims.search_pexels(query=img_keywords, size='large')
)
if photo_url:
picture = slide.shapes.add_picture(
image_file=ims.get_image_from_url(photo_url),
left=0,
top=0,
width=pptx.util.Inches(slide_width_inch),
)
_add_text_at_bottom(
slide=slide,
slide_width_inch=slide_width_inch,
slide_height_inch=slide_height_inch,
text='Photos provided by Pexels',
hyperlink=page_url
)
# Move picture to background
# https://github.com/scanny/python-pptx/issues/49#issuecomment-137172836
slide.shapes._spTree.remove(picture._element)
slide.shapes._spTree.insert(2, picture._element)
except Exception as ex:
logger.error(
'*** Error occurred while running adding image to the slide background: %s',
str(ex)
)
return True
def _add_text_at_bottom(
slide: pptx.slide.Slide,
slide_width_inch: float,
slide_height_inch: float,
text: str,
hyperlink: Optional[str] = None,
target_height: Optional[float] = 0.5
):
"""
Add arbitrary text to a textbox positioned near the lower left side of a slide.
:param slide: The slide.
:param slide_width_inch: The width of the slide.
:param slide_height_inch: The height of the slide.
:param target_height: the target height of the box in inches (optional).
:param text: The text to be added
:param hyperlink: The hyperlink to be added to the text (optional).
"""
footer = slide.shapes.add_textbox(
left=INCHES_1,
top=pptx.util.Inches(slide_height_inch - target_height),
width=pptx.util.Inches(slide_width_inch),
height=pptx.util.Inches(target_height)
)
paragraph = footer.text_frame.paragraphs[0]
run = paragraph.add_run()
run.text = text
run.font.size = pptx.util.Pt(10)
run.font.underline = False
if hyperlink:
run.hyperlink.address = hyperlink
def _handle_double_col_layout(
presentation: pptx.Presentation(),
slide_json: dict,
slide_width_inch: float,
slide_height_inch: float
) -> bool:
"""
Add a slide with a double column layout for comparison.
:param presentation: The presentation object.
:param slide_json: The content of the slide as JSON data.
:param slide_width_inch: The width of the slide in inches.
:param slide_height_inch: The height of the slide in inches.
:return: True if double col layout has been added; False otherwise.
"""
if 'bullet_points' in slide_json and slide_json['bullet_points']:
double_col_content = slide_json['bullet_points']
if double_col_content and (
len(double_col_content) == 2
) and isinstance(double_col_content[0], dict) and isinstance(double_col_content[1], dict):
slide = presentation.slide_layouts[4]
slide = presentation.slides.add_slide(slide)
shapes = slide.shapes
title_placeholder = shapes.title
title_placeholder.text = remove_slide_number_from_heading(slide_json['heading'])
left_heading, right_heading = shapes.placeholders[1], shapes.placeholders[3]
left_col, right_col = shapes.placeholders[2], shapes.placeholders[4]
left_col_frame, right_col_frame = left_col.text_frame, right_col.text_frame
if 'heading' in double_col_content[0]:
left_heading.text = double_col_content[0]['heading']
if 'bullet_points' in double_col_content[0]:
flat_items_list = get_flat_list_of_contents(
double_col_content[0]['bullet_points'], level=0
)
for idx, an_item in enumerate(flat_items_list):
if idx == 0:
left_col_frame.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
else:
paragraph = left_col_frame.add_paragraph()
paragraph.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
paragraph.level = an_item[1]
if 'heading' in double_col_content[1]:
right_heading.text = double_col_content[1]['heading']
if 'bullet_points' in double_col_content[1]:
flat_items_list = get_flat_list_of_contents(
double_col_content[1]['bullet_points'], level=0
)
for idx, an_item in enumerate(flat_items_list):
if idx == 0:
right_col_frame.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
else:
paragraph = right_col_frame.add_paragraph()
paragraph.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
paragraph.level = an_item[1]
_handle_key_message(
the_slide=slide,
slide_json=slide_json,
slide_height_inch=slide_height_inch,
slide_width_inch=slide_width_inch
)
return True
return False
def _handle_step_by_step_process(
presentation: pptx.Presentation,
slide_json: dict,
slide_width_inch: float,
slide_height_inch: float
) -> bool:
"""
Add shapes to display a step-by-step process in the slide, if available.
:param presentation: The presentation object.
:param slide_json: The content of the slide as JSON data.
:param slide_width_inch: The width of the slide in inches.
:param slide_height_inch: The height of the slide in inches.
:return True if this slide has a step-by-step process depiction added; False otherwise.
"""
if 'bullet_points' in slide_json and slide_json['bullet_points']:
steps = slide_json['bullet_points']
no_marker_count = 0.0
n_steps = len(steps)
# Ensure that it is a single list of strings without any sub-list
for step in steps:
if not isinstance(step, str):
return False
# In some cases, one or two steps may not begin with >>, e.g.:
# {
# "heading": "Step-by-Step Process: Creating a Legacy",
# "bullet_points": [
# "Identify your unique talents and passions",
# ">> Develop your skills and knowledge",
# ">> Create meaningful work",
# ">> Share your work with the world",
# ">> Continuously learn and adapt"
# ],
# "key_message": ""
# },
#
# Use a threshold, e.g., at most 20%
if not step.startswith(STEP_BY_STEP_PROCESS_MARKER):
no_marker_count += 1
slide_header = slide_json['heading'].lower()
if (no_marker_count / n_steps > 0.25) and not (
('step-by-step' in slide_header) or ('step by step' in slide_header)
):
return False
bullet_slide_layout = presentation.slide_layouts[1]
slide = presentation.slides.add_slide(bullet_slide_layout)
shapes = slide.shapes
shapes.title.text = remove_slide_number_from_heading(slide_json['heading'])
if 3 <= n_steps <= 4:
# Horizontal display
height = INCHES_1_5
width = pptx.util.Inches(slide_width_inch / n_steps - 0.01)
top = pptx.util.Inches(slide_height_inch / 2)
left = pptx.util.Inches((slide_width_inch - width.inches * n_steps) / 2 + 0.05)
for step in steps:
shape = shapes.add_shape(MSO_AUTO_SHAPE_TYPE.CHEVRON, left, top, width, height)
shape.text = step.removeprefix(STEP_BY_STEP_PROCESS_MARKER)
left += width - INCHES_0_4
elif 4 < n_steps <= 6:
# Vertical display
height = pptx.util.Inches(0.65)
top = pptx.util.Inches(slide_height_inch / 4)
left = INCHES_1 # slide_width_inch - width.inches)
# Find the close to median width, based on the length of each text, to be set
# for the shapes
width = pptx.util.Inches(slide_width_inch * 2 / 3)
lengths = [len(step) for step in steps]
font_size_20pt = pptx.util.Pt(20)
widths = sorted(
[
min(
pptx.util.Inches(font_size_20pt.inches * a_len),
width
) for a_len in lengths
]
)
width = widths[len(widths) // 2]
for step in steps:
shape = shapes.add_shape(MSO_AUTO_SHAPE_TYPE.PENTAGON, left, top, width, height)
shape.text = step.removeprefix(STEP_BY_STEP_PROCESS_MARKER)
top += height + INCHES_0_3
left += INCHES_0_5
else:
# Two steps -- probably not a process
# More than 5--6 steps -- would likely cause a visual clutter
return False
return True
def _handle_key_message(
the_slide: pptx.slide.Slide,
slide_json: dict,
slide_width_inch: float,
slide_height_inch: float
):
"""
Add a shape to display the key message in the slide, if available.
:param the_slide: The slide to be processed.
:param slide_json: The content of the slide as JSON data.
:param slide_width_inch: The width of the slide in inches.
:param slide_height_inch: The height of the slide in inches.
"""
if 'key_message' in slide_json and slide_json['key_message']:
height = pptx.util.Inches(1.6)
width = pptx.util.Inches(slide_width_inch / 2.3)
top = pptx.util.Inches(slide_height_inch - height.inches - 0.1)
left = pptx.util.Inches((slide_width_inch - width.inches) / 2)
shape = the_slide.shapes.add_shape(
MSO_AUTO_SHAPE_TYPE.ROUNDED_RECTANGLE,
left=left,
top=top,
width=width,
height=height
)
shape.text = slide_json['key_message']
def _get_slide_width_height_inches(presentation: pptx.Presentation) -> Tuple[float, float]:
"""
Get the dimensions of a slide in inches.
:param presentation: The presentation object.
:return: The width and the height.
"""
slide_width_inch = EMU_TO_INCH_SCALING_FACTOR * presentation.slide_width
slide_height_inch = EMU_TO_INCH_SCALING_FACTOR * presentation.slide_height
# logger.debug('Slide width: %f, height: %f', slide_width_inch, slide_height_inch)
return slide_width_inch, slide_height_inch
def print_placeholder_names(slide: pptx.slide.Slide):
"""
Display the placeholder details of a given slide.
:param slide: The slide.
"""
for shape in slide.placeholders:
print(f'{shape.placeholder_format.idx=}, {shape.name=}')
if __name__ == '__main__':
_JSON_DATA = '''
{
"title": "Mastering PowerPoint Shapes",
"slides": [
{
"heading": "Introduction to PowerPoint Shapes",
"bullet_points": [
"Shapes are fundamental elements in PowerPoint",
"Used to create diagrams, flowcharts, and visuals",
"Available in various types: lines, rectangles, circles, etc."
],
"key_message": "",
"img_keywords": "PowerPoint shapes, basic shapes"
},
{
"heading": "Types of Shapes in PowerPoint",
"bullet_points": [
"Lines: Connect two points",
"Rectangles: Four-sided figures with right angles",
[
"Squares: Special type of rectangle with equal sides",
"Rounded Rectangles: Rectangles with rounded corners"
],
"Circles: Round shapes with no corners",
"Ovals: Elliptical shapes"
],
"key_message": "",
"img_keywords": "PowerPoint shapes, types of shapes"
},
{
"heading": "Creating and Manipulating Shapes",
"bullet_points": [
">> Select the 'Home' tab and click on 'Shapes'",
">> Choose the desired shape",
">> Click and drag to create the shape",
">> Resize, move, or rotate shapes using handles",
">> Change shape color, fill, and outline"
],
"key_message": "Demonstrates the process of creating and manipulating shapes",
"img_keywords": "PowerPoint shapes, creating shapes, manipulating shapes"
},
{
"heading": "Advanced Shape Manipulation",
"bullet_points": [
{
"heading": "Adding Text to Shapes",
"bullet_points": [
"Right-click on the shape and select 'Add Text'",
"Type or paste the desired text"
]
},
{
"heading": "Grouping and Ungrouping Shapes",
"bullet_points": [
"Select multiple shapes and press 'Ctrl + G' to group",
"Right-click and select 'Ungroup' to separate"
]
}
],
"key_message": "Explores advanced techniques for working with shapes",
"img_keywords": "PowerPoint shapes, advanced manipulation, grouping, text in shapes"
},
{
"heading": "Using the 'Format' Tab for Shapes",
"bullet_points": [
"Access advanced shape formatting options",
"Change shape fill, outline, and effects",
"Adjust shape size and position"
],
"key_message": "",
"img_keywords": "PowerPoint shapes, format tab, advanced formatting"
},
{
"heading": "Example: Creating a Simple Diagram",
"bullet_points": [
"Use rectangles to represent blocks",
"Use lines to connect blocks",
"Add text to shapes to label elements"
],
"key_message": "Illustrates the use of shapes to create a simple diagram",
"img_keywords": "PowerPoint shapes, diagram example, simple diagram"
},
{
"heading": "Example: Creating a Flowchart",
"bullet_points": [
"Use different shapes to represent steps, decisions, and inputs/outputs",
"Use connectors to link shapes",
"Add text to shapes to describe each step"
],
"key_message": "Demonstrates the use of shapes to create a flowchart",
"img_keywords": "PowerPoint shapes, flowchart example, creating flowchart"
},
{
"heading": "Double Column Layout: Shapes in Older vs. Newer PowerPoint Versions",
"bullet_points": [
{
"heading": "Older PowerPoint Versions",
"bullet_points": [
"Limited shape types and formatting options",
"Less intuitive shape creation and manipulation"
]
},
{
"heading": "Newer PowerPoint Versions",
"bullet_points": [
"Expanded shape library with more types and styles",
"Improved shape formatting and manipulation tools"
]
}
],
"key_message": "Compares the use of shapes in older and newer PowerPoint versions",
"img_keywords": "PowerPoint shapes, older versions, newer versions, comparison"
},
{
"heading": "Tips for Effective Use of Shapes",
"bullet_points": [
"Keep shapes simple and uncluttered",
"Use consistent colors and styles",
"Avoid overusing shapes, maintain balance with text and other elements"
],
"key_message": "Provides best practices for using shapes in presentations",
"img_keywords": "PowerPoint shapes, best practices, effective use"
},
{
"heading": "Conclusion",
"bullet_points": [
"Shapes are versatile tools in PowerPoint",
"Mastering shapes enhances presentation visuals",
"Practice and experimentation are key to improving shape usage"
],
"key_message": "Summarizes the importance of shapes in PowerPoint and encourages practice",
"img_keywords": "PowerPoint shapes, conclusion, importance"
}
]
}'''
temp = tempfile.NamedTemporaryFile(delete=False, suffix='.pptx')
path = pathlib.Path(temp.name)
generate_powerpoint_presentation(
json5.loads(_JSON_DATA),
output_file_path=path,
slides_template='Basic'
)
print(f'File path: {path}')
temp.close()