Spaces:

Davidsamuel101
/

PPTGenerator

Runtime error

Davidsamuel101 commited on Apr 28, 2023

Commit

5f21add

•

1 Parent(s): 189657b

Fix get_slides in text_extractor.py

Files changed (3) hide show

__pycache__/app.cpython-38.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-38.pyc and b/__pycache__/app.cpython-38.pyc differ

__pycache__/text_extractor.cpython-38.pyc CHANGED Viewed

Binary files a/__pycache__/text_extractor.cpython-38.pyc and b/__pycache__/text_extractor.cpython-38.pyc differ

text_extractor.py CHANGED Viewed

@@ -117,19 +117,16 @@ class TextExtractor:
                     # Remove tag and pipes from the text
                     section.append((tag, re.sub(r'<.*?>|\|', '', text).strip()))
                 elif tag.startswith('p'):
-                    text = re.split("((\|){2,})", text)
                     for paragraph in text:
-                        paragraph = re.sub(r'<.*?>|\|', '', paragraph).strip()
-                        if paragraph and paragraph[0].islower(): # If a parggraph in a different block is found and the first character isn't an uppercase then concanate with last paragraph
-                            my_list = list(section[-1])
-                            my_list[1] += f" {paragraph}"
-                            my_tuple = tuple(my_list)
-                            section[-1] = my_tuple # Append back the concatenated paragraph back to the section
-                        elif paragraph:
-                            paragraph = re.sub(' +', ' ', paragraph) # Replace any double space in the paragraph
-                            section.append((tag, paragraph))
                 try:
-                    if re.search(r'(?<=<)(.*?)(?=>)', text).group() == 'h1': # Create new page when current text is a tpye 1 header or title
                         slides[f"Page {page}"] = section
                         page += 1
                 except:

                     # Remove tag and pipes from the text
                     section.append((tag, re.sub(r'<.*?>|\|', '', text).strip()))
                 elif tag.startswith('p'):
+                    text = re.split("((\|){2,})", text) # If encounter more than 1 pipe than split that text into different paragraphs
                     for paragraph in text:
+                        paragraph = re.sub(r'<.*?>|\|', '', paragraph).strip() # Remove any pipe
+                        paragraph = re.sub(' +', ' ', paragraph) # Remove any double or more spaces into single space
+                        if paragraph and paragraph[0].islower(): # If a pargraph in a different block is found and the first character isn't an uppercase then concanate with last paragraph
+                            section[-1][1] += f" {paragraph}"
+                        elif paragraph:
+                            section.append([tag, paragraph])
                 try:
+                    if tag_match.group() == 'h1': # Create new page when current text is a type 1 header or title
                         slides[f"Page {page}"] = section
                         page += 1
                 except: