Spaces:
Running
Running
import PyPDF2 | |
import io | |
import os | |
from dotenv import load_dotenv | |
import groq | |
import streamlit as st | |
from typing import List, Dict, Optional | |
from pydantic import BaseModel | |
load_dotenv() | |
def extract_text_from_pdf(pdf_path): | |
""" | |
Extracts text from a PDF file. | |
Args: | |
pdf_path (str): The path to the PDF file. | |
Returns: | |
str: The extracted text. Returns an empty string if extraction fails. | |
""" | |
text = "" | |
try: | |
with open(pdf_path, 'rb') as file: | |
reader = PyPDF2.PdfReader(file) | |
for page_num in range(len(reader.pages)): | |
page = reader.pages[page_num] | |
text += page.extract_text() | |
except Exception as e: | |
print(f"Error extracting text from PDF: {e}") | |
return "" # Return empty string on failure | |
return text | |