ibuilder / pdfextractor.py
Soufianesejjari's picture
working
7c71548
raw
history blame contribute delete
833 Bytes
import PyPDF2
import io
import os
from dotenv import load_dotenv
import groq
import streamlit as st
from typing import List, Dict, Optional
from pydantic import BaseModel
load_dotenv()
def extract_text_from_pdf(pdf_path):
"""
Extracts text from a PDF file.
Args:
pdf_path (str): The path to the PDF file.
Returns:
str: The extracted text. Returns an empty string if extraction fails.
"""
text = ""
try:
with open(pdf_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
for page_num in range(len(reader.pages)):
page = reader.pages[page_num]
text += page.extract_text()
except Exception as e:
print(f"Error extracting text from PDF: {e}")
return "" # Return empty string on failure
return text