Spaces:

barunsaha
/

slide-deck-ai

Running

File size: 7,410 Bytes

import pathlib
import tempfile
from typing import List, Tuple
import json5
import logging
import pptx
import re
import yaml

from global_config import GlobalConfig


PATTERN = re.compile(r"^slide[ ]+\d+:", re.IGNORECASE)
SAMPLE_JSON_FOR_PPTX = '''
{
    "title": "Understanding AI",
    "slides": [
        {
            "heading": "Introduction",
            "bullet_points": [
                "Brief overview of AI",
                [
                    "Importance of understanding AI"
                ]
            ]
        }
    ]
}
'''

logging.basicConfig(
    level=GlobalConfig.LOG_LEVEL,
    format='%(asctime)s - %(message)s',
)


def remove_slide_number_from_heading(header: str) -> str:
    """
    Remove the slide number from a given slide header.

    :param header: The header of a slide
    """

    if PATTERN.match(header):
        idx = header.find(':')
        header = header[idx + 1:]

    return header


def generate_powerpoint_presentation(
        structured_data: str,
        as_yaml: bool,
        slides_template: str,
        output_file_path: pathlib.Path
) -> List:
    """
    Create and save a PowerPoint presentation file containing the contents in JSON or YAML format.

    :param structured_data: The presentation contents as "JSON" (may contain trailing commas) or YAML
    :param as_yaml: True if the input data is in YAML format; False if it is in JSON format
    :param slides_template: The PPTX template to use
    :param output_file_path: The path of the PPTX file to save as
    :return A list of presentation title and slides headers
    """

    if as_yaml:
        # Avoid YAML mode: nested bullets can lead to incorrect YAML generation
        try:
            parsed_data = yaml.safe_load(structured_data)
        except yaml.parser.ParserError as ype:
            logging.error(f'*** YAML parse error: {ype}')
            parsed_data = {'title': '', 'slides': []}
    else:
        # The structured "JSON" might contain trailing commas, so using json5
        parsed_data = json5.loads(structured_data)

    logging.debug(f"*** Using PPTX template: {GlobalConfig.PPTX_TEMPLATE_FILES[slides_template]['file']}")
    presentation = pptx.Presentation(GlobalConfig.PPTX_TEMPLATE_FILES[slides_template]['file'])

    # The title slide
    title_slide_layout = presentation.slide_layouts[0]
    slide = presentation.slides.add_slide(title_slide_layout)
    title = slide.shapes.title
    subtitle = slide.placeholders[1]
    title.text = parsed_data['title']
    logging.debug(f'Title is: {title.text}')
    subtitle.text = 'by Myself and SlideDeck AI :)'
    all_headers = [title.text, ]

    # background = slide.background
    # background.fill.solid()
    # background.fill.fore_color.rgb = RGBColor.from_string('C0C0C0')  # Silver
    # title.text_frame.paragraphs[0].font.color.rgb = RGBColor(0, 0, 128)  # Navy blue

    # Add contents in a loop
    for a_slide in parsed_data['slides']:
        bullet_slide_layout = presentation.slide_layouts[1]
        slide = presentation.slides.add_slide(bullet_slide_layout)
        shapes = slide.shapes

        title_shape = shapes.title
        body_shape = shapes.placeholders[1]
        title_shape.text = remove_slide_number_from_heading(a_slide['heading'])
        all_headers.append(title_shape.text)
        text_frame = body_shape.text_frame

        # The bullet_points may contain a nested hierarchy of JSON arrays
        # In some scenarios, it may contain objects (dictionaries) because the LLM generated so
        #  ^ The second scenario is not covered

        flat_items_list = get_flat_list_of_contents(a_slide['bullet_points'], level=0)

        for an_item in flat_items_list:
            paragraph = text_frame.add_paragraph()
            paragraph.text = an_item[0]
            paragraph.level = an_item[1]

    # The thank-you slide
    last_slide_layout = presentation.slide_layouts[0]
    slide = presentation.slides.add_slide(last_slide_layout)
    title = slide.shapes.title
    title.text = 'Thank you!'

    presentation.save(output_file_path)

    return all_headers


def get_flat_list_of_contents(items: list, level: int) -> List[Tuple]:
    """
    Flatten a (hierarchical) list of bullet points to a single list containing each item and its level.

    :param items: A bullet point (string or list)
    :param level: The current level of hierarchy
    :return: A list of (bullet item text, hierarchical level) tuples
    """

    flat_list = []

    for item in items:
        if isinstance(item, str):
            flat_list.append((item, level))
        elif isinstance(item, list):
            flat_list = flat_list + get_flat_list_of_contents(item, level + 1)

    return flat_list


if __name__ == '__main__':
    # bullets = [
    #     'Description',
    #     'Types',
    #     [
    #         'Type A',
    #         'Type B'
    #     ],
    #     'Grand parent',
    #     [
    #         'Parent',
    #         [
    #             'Grand child'
    #         ]
    #     ]
    # ]

    # output = get_flat_list_of_contents(bullets, level=0)
    # for x in output:
    #     print(x)

    json_data = '''
    {
    "title": "Understanding AI",
    "slides": [
        {
            "heading": "Introduction",
            "bullet_points": [
                "Brief overview of AI",
                [
                    "Importance of understanding AI"
                ]
            ]
        },
        {
            "heading": "What is AI?",
            "bullet_points": [
                "Definition of AI",
                [
                    "Types of AI",
                    [
                        "Narrow or weak AI",
                        "General or strong AI"
                    ]
                ],
                "Differences between AI and machine learning"
            ]
        },
        {
            "heading": "How AI Works",
            "bullet_points": [
                "Overview of AI algorithms",
                [
                    "Types of AI algorithms",
                    [
                        "Rule-based systems",
                        "Decision tree systems",
                        "Neural networks"
                    ]
                ],
                "How AI processes data"
            ]
        },
        {
            "heading": "Pros of AI",
            "bullet_points": [
                "Increased efficiency and productivity",
                "Improved accuracy and precision",
                "Enhanced decision-making capabilities",
                "Personalized experiences"
            ]
        },
        {
            "heading": "Cons of AI",
            "bullet_points": [
                "Job displacement and loss of employment",
                "Bias and discrimination",
                "Privacy and security concerns",
                "Dependence on technology"
            ]
        },
        {
            "heading": "Future Prospects of AI",
            "bullet_points": [
                "Advancements in fields such as healthcare and finance",
                "Increased use"
            ]
        }
    ]
}'''

    temp = tempfile.NamedTemporaryFile(delete=False, suffix='.pptx')
    path = pathlib.Path(temp.name)

    generate_powerpoint_presentation(
        json5.loads(json_data),
        as_yaml=False,
        output_file_path=path,
        slides_template='Blank'
    )