Spaces:

barunsaha
/

slide-deck-ai

Running

App Files Files Community

slide-deck-ai / helpers /pptx_helper.py

barunsaha

Fix problems with prior commits

e065b20 3 months ago

raw

history blame

No virus

11.8 kB

	import logging
	import pathlib
	import re
	import tempfile

	from typing import List, Tuple

	import json5
	import pptx
	from pptx.enum.shapes import MSO_AUTO_SHAPE_TYPE

	from global_config import GlobalConfig


	# English Metric Unit (used by PowerPoint) to inches
	EMU_TO_INCH_SCALING_FACTOR = 1.0 / 914400
	INCHES_1_5 = pptx.util.Inches(1.5)
	INCHES_1 = pptx.util.Inches(1)
	INCHES_0_5 = pptx.util.Inches(0.5)
	INCHES_0_4 = pptx.util.Inches(0.4)
	INCHES_0_3 = pptx.util.Inches(0.3)

	STEP_BY_STEP_PROCESS_MARKER = '>> '

	PATTERN = re.compile(r"^slide[ ]+\d+:", re.IGNORECASE)
	SAMPLE_JSON_FOR_PPTX = '''
	{
	"title": "Understanding AI",
	"slides": [
	{
	"heading": "Introduction",
	"bullet_points": [
	"Brief overview of AI",
	[
	"Importance of understanding AI"
	]
	]
	}
	]
	}
	'''

	logger = logging.getLogger(__name__)


	def remove_slide_number_from_heading(header: str) -> str:
	"""
	Remove the slide number from a given slide header.

	:param header: The header of a slide.
	"""

	if PATTERN.match(header):
	idx = header.find(':')
	header = header[idx + 1:]

	return header


	def generate_powerpoint_presentation(
	structured_data: str,
	slides_template: str,
	output_file_path: pathlib.Path
	) -> List:
	"""
	Create and save a PowerPoint presentation file containing the content in JSON format.

	:param structured_data: The presentation contents as "JSON" (may contain trailing commas).
	:param slides_template: The PPTX template to use.
	:param output_file_path: The path of the PPTX file to save as.
	:return A list of presentation title and slides headers.
	"""

	# The structured "JSON" might contain trailing commas, so using json5
	parsed_data = json5.loads(structured_data)

	logger.debug(
	'*** Using PPTX template: %s',
	GlobalConfig.PPTX_TEMPLATE_FILES[slides_template]['file']
	)
	presentation = pptx.Presentation(GlobalConfig.PPTX_TEMPLATE_FILES[slides_template]['file'])

	slide_width_inch = EMU_TO_INCH_SCALING_FACTOR * presentation.slide_width
	slide_height_inch = EMU_TO_INCH_SCALING_FACTOR * presentation.slide_height
	logger.debug('Slide width: %f, height: %f', slide_width_inch, slide_height_inch)

	# The title slide
	title_slide_layout = presentation.slide_layouts[0]
	slide = presentation.slides.add_slide(title_slide_layout)
	title = slide.shapes.title
	subtitle = slide.placeholders[1]
	title.text = parsed_data['title']
	logger.info(
	'PPT title: %s \| #slides: %d',
	title.text, len(parsed_data['slides'])
	)
	subtitle.text = 'by Myself and SlideDeck AI :)'
	all_headers = [title.text, ]

	# background = slide.background
	# background.fill.solid()
	# background.fill.fore_color.rgb = RGBColor.from_string('C0C0C0') # Silver
	# title.text_frame.paragraphs[0].font.color.rgb = RGBColor(0, 0, 128) # Navy blue

	# Add contents in a loop
	for a_slide in parsed_data['slides']:
	bullet_slide_layout = presentation.slide_layouts[1]
	slide = presentation.slides.add_slide(bullet_slide_layout)

	if not _handle_step_by_step_process(
	slide=slide,
	slide_json=a_slide,
	slide_width_inch=slide_width_inch,
	slide_height_inch=slide_height_inch,
	):
	_handle_default_display(slide, a_slide)

	_handle_key_message(
	slide=slide,
	slide_json=a_slide,
	slide_width_inch=slide_width_inch,
	slide_height_inch=slide_height_inch,
	)

	# The thank-you slide
	last_slide_layout = presentation.slide_layouts[0]
	slide = presentation.slides.add_slide(last_slide_layout)
	title = slide.shapes.title
	title.text = 'Thank you!'

	presentation.save(output_file_path)

	return all_headers


	def get_flat_list_of_contents(items: list, level: int) -> List[Tuple]:
	"""
	Flatten a (hierarchical) list of bullet points to a single list containing each item and
	its level.

	:param items: A bullet point (string or list).
	:param level: The current level of hierarchy.
	:return: A list of (bullet item text, hierarchical level) tuples.
	"""

	flat_list = []

	for item in items:
	if isinstance(item, str):
	flat_list.append((item, level))
	elif isinstance(item, list):
	flat_list = flat_list + get_flat_list_of_contents(item, level + 1)

	return flat_list


	def _handle_default_display(
	slide: pptx.slide.Slide,
	slide_json: dict,
	):
	"""
	Display a list of text in a slide.

	:param slide: The slide to be processed.
	:param slide_json: The content of the slide as JSON data.
	"""

	shapes = slide.shapes
	title_shape = shapes.title
	body_shape = shapes.placeholders[1]
	title_shape.text = remove_slide_number_from_heading(slide_json['heading'])
	text_frame = body_shape.text_frame

	# The bullet_points may contain a nested hierarchy of JSON arrays
	# In some scenarios, it may contain objects (dictionaries) because the LLM generated so
	# ^ The second scenario is not covered

	flat_items_list = get_flat_list_of_contents(slide_json['bullet_points'], level=0)

	for an_item in flat_items_list:
	paragraph = text_frame.add_paragraph()
	paragraph.text = an_item[0].removeprefix(STEP_BY_STEP_PROCESS_MARKER)
	paragraph.level = an_item[1]


	def _handle_step_by_step_process(
	slide: pptx.slide.Slide,
	slide_json: dict,
	slide_width_inch: float,
	slide_height_inch: float
	) -> bool:
	"""
	Add shapes to display a step-by-step process in the slide, if available.

	:param slide: The slide to be processed.
	:param slide_json: The content of the slide as JSON data.
	:param slide_width_inch: The width of the slide in inches.
	:param slide_height_inch: The height of the slide in inches.
	:return True is this slide has a step-by-step process depiction; False otherwise.
	"""

	if 'bullet_points' in slide_json and slide_json['bullet_points']:
	steps = slide_json['bullet_points']

	# Ensure that it is a single list of strings without any sub-list
	for step in steps:
	if not isinstance(step, str):
	return False

	if not step.startswith(STEP_BY_STEP_PROCESS_MARKER):
	return False

	shapes = slide.shapes
	shapes.title.text = remove_slide_number_from_heading(slide_json['heading'])
	n_steps = len(steps)

	if 3 <= n_steps <= 4:
	# Horizontal display
	height = INCHES_1_5
	width = pptx.util.Inches(slide_width_inch / n_steps - 0.01)
	top = pptx.util.Inches(slide_height_inch / 2)
	left = pptx.util.Inches((slide_width_inch - width.inches * n_steps) / 2 + 0.05)

	for step in steps:
	shape = shapes.add_shape(MSO_AUTO_SHAPE_TYPE.CHEVRON, left, top, width, height)
	shape.text = step.removeprefix(STEP_BY_STEP_PROCESS_MARKER)
	left += width - INCHES_0_4
	elif n_steps > 4:
	# Vertical display
	height = pptx.util.Inches(0.65)
	width = pptx.util.Inches(slide_width_inch * 2/ 3)
	top = pptx.util.Inches(slide_height_inch / 4)
	left = INCHES_1 # slide_width_inch - width.inches)

	for step in steps:
	shape = shapes.add_shape(MSO_AUTO_SHAPE_TYPE.PENTAGON, left, top, width, height)
	shape.text = step.removeprefix(STEP_BY_STEP_PROCESS_MARKER)
	top += height + INCHES_0_3
	left += INCHES_0_5
	else:
	# Two steps -- probably not a process
	return False

	return True


	def _handle_key_message(
	slide: pptx.slide.Slide,
	slide_json: dict,
	slide_width_inch: float,
	slide_height_inch: float
	):
	"""
	Add a shape to display the key message in the slide, if available.

	:param slide: The slide to be processed.
	:param slide_json: The content of the slide as JSON data.
	:param slide_width_inch: The width of the slide in inches.
	:param slide_height_inch: The height of the slide in inches.
	"""

	if 'key_message' in slide_json and slide_json['key_message']:
	height = pptx.util.Inches(1.6)
	width = pptx.util.Inches(slide_width_inch / 2.3)
	top = pptx.util.Inches(slide_height_inch - height.inches - 0.1)
	left = pptx.util.Inches((slide_width_inch - width.inches) / 2)
	shape = slide.shapes.add_shape(
	MSO_AUTO_SHAPE_TYPE.ROUNDED_RECTANGLE,
	left=left,
	top=top,
	width=width,
	height=height
	)
	shape.text = slide_json['key_message']


	if __name__ == '__main__':
	_JSON_DATA = '''
	{
	"title": "Understanding AI",
	"slides": [
	{
	"heading": "Introduction",
	"bullet_points": [
	"Brief overview of AI",
	[
	"Importance of understanding AI"
	]
	],
	"key_message": ""
	},
	{
	"heading": "What is AI?",
	"bullet_points": [
	"Definition of AI",
	[
	"Types of AI",
	[
	"Narrow or weak AI",
	"General or strong AI"
	]
	],
	"Differences between AI and machine learning"
	],
	"key_message": ""
	},
	{
	"heading": "How AI Works",
	"bullet_points": [
	"Overview of AI algorithms",
	[
	"Types of AI algorithms",
	[
	"Rule-based systems",
	"Decision tree systems",
	"Neural networks"
	]
	],
	"How AI processes data"
	],
	"key_message": ""
	},
	{
	"heading": "Building AI Models",
	"bullet_points": [
	">> Collect data",
	">> Select model or architecture to use",
	">> Set appropriate parameters",
	">> Train model with data",
	">> Run inference",
	],
	"key_message": ""
	},
	{
	"heading": "Pros of AI",
	"bullet_points": [
	"Increased efficiency and productivity",
	"Improved accuracy and precision",
	"Enhanced decision-making capabilities",
	"Personalized experiences"
	],
	"key_message": "AI can be used for many different purposes"
	},
	{
	"heading": "Cons of AI",
	"bullet_points": [
	"Job displacement and loss of employment",
	"Bias and discrimination",
	"Privacy and security concerns",
	"Dependence on technology"
	],
	"key_message": ""
	},
	{
	"heading": "Future Prospects of AI",
	"bullet_points": [
	"Advancements in fields such as healthcare and finance",
	"Increased use"
	],
	"key_message": ""
	}
	]
	}'''

	temp = tempfile.NamedTemporaryFile(delete=False, suffix='.pptx')
	path = pathlib.Path(temp.name)

	generate_powerpoint_presentation(
	json5.loads(_JSON_DATA),
	output_file_path=path,
	slides_template='Basic'
	)
	print(f'File path: {path}')

	temp.close()