Hyeonseo commited on
Commit
31ea449
β€’
1 Parent(s): 0e60cbc

revise: PPT script reader

Browse files
Files changed (1) hide show
  1. app.py +23 -7
app.py CHANGED
@@ -6,6 +6,7 @@ import time
6
  import numpy as np
7
  import pandas as pd
8
  import PyPDF2
 
9
  import openai
10
  import subprocess
11
 
@@ -117,16 +118,31 @@ def text2ppt(token_key, input_prompt, input_theme):
117
  def ppt2script(token_key, input_file, input_type):
118
  openai.api_key = token_key
119
 
120
- with open(input_file, 'rb') as pdf_file:
121
- pdf_reader = PyPDF2.PdfReader(pdf_file)
122
- num_pages = len(pdf_reader.pages)
 
 
 
 
 
 
 
 
 
 
123
 
124
- # 각 νŽ˜μ΄μ§€μ˜ λ‚΄μš©μ„ λ¬Έμžμ—΄λ‘œ λ³€ν™˜ν•©λ‹ˆλ‹€.
125
  text = ""
126
- for page_num in range(num_pages):
127
- page = pdf_reader.pages[page_num]
128
  text += "[PAGE_NUM " + str(page_num + 1) + "]"
129
- text += page.extract_text()
 
 
 
 
 
 
130
 
131
  header = """
132
  λ„ˆλŠ”λŠ” PPT λ°œν‘œμ— 도움을 μ£ΌλŠ” μ‘°λ ₯μžμ•Ό.
 
6
  import numpy as np
7
  import pandas as pd
8
  import PyPDF2
9
+ from pptx import Presentation
10
  import openai
11
  import subprocess
12
 
 
118
  def ppt2script(token_key, input_file, input_type):
119
  openai.api_key = token_key
120
 
121
+ if input_type=="PDF":
122
+ with open(input_file, 'rb') as pdf_file:
123
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
124
+ num_pages = len(pdf_reader.pages)
125
+
126
+ # 각 νŽ˜μ΄μ§€μ˜ λ‚΄μš©μ„ λ¬Έμžμ—΄λ‘œ λ³€ν™˜ν•©λ‹ˆλ‹€.
127
+ text = ""
128
+ for page_num in range(num_pages):
129
+ page = pdf_reader.pages[page_num]
130
+ text += "[PAGE_NUM " + str(page_num + 1) + "]"
131
+ text += page.extract_text()
132
+ else:
133
+ prs = Presentation(path_to_presentation)
134
 
 
135
  text = ""
136
+ page_num = 0
137
+ for slide in prs.slides:
138
  text += "[PAGE_NUM " + str(page_num + 1) + "]"
139
+ page_num += 1
140
+ for shape in slide.shapes:
141
+ if not shape.has_text_frame:
142
+ continue
143
+ for paragraph in shape.text_frame.paragraphs:
144
+ for run in paragraph.runs:
145
+ text += run.text
146
 
147
  header = """
148
  λ„ˆλŠ”λŠ” PPT λ°œν‘œμ— 도움을 μ£ΌλŠ” μ‘°λ ₯μžμ•Ό.