saq1b commited on
Commit
e4259c0
1 Parent(s): ff00436

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -7,7 +7,7 @@ import io
7
  import edge_tts
8
  import asyncio
9
  import aiofiles
10
- import PyPDF2
11
  import os
12
  from typing import List, Dict, Tuple
13
 
@@ -277,8 +277,8 @@ class TextExtractor:
277
  async def extract_from_pdf(file_path: str) -> str:
278
  async with aiofiles.open(file_path, 'rb') as file:
279
  content = await file.read()
280
- pdf_reader = PyPDF2.PdfReader(io.BytesIO(content))
281
- return " ".join(page.extract_text() for page in pdf_reader.pages)
282
 
283
  @staticmethod
284
  async def extract_from_txt(file_path: str) -> str:
 
7
  import edge_tts
8
  import asyncio
9
  import aiofiles
10
+ import pypdf
11
  import os
12
  from typing import List, Dict, Tuple
13
 
 
277
  async def extract_from_pdf(file_path: str) -> str:
278
  async with aiofiles.open(file_path, 'rb') as file:
279
  content = await file.read()
280
+ pdf_reader = pypdf.PdfReader(io.BytesIO(content))
281
+ return "\n\n".join(page.extract_text() for page in pdf_reader.pages if page.extract_text())
282
 
283
  @staticmethod
284
  async def extract_from_txt(file_path: str) -> str: