Benjamin Bossan
Add pdf processor using pypdf
4c2b75c
import os
from pathlib import Path
from pydantic import BaseSettings
from pydantic.types import PositiveInt
class Config(BaseSettings):
hf_hub_token: str = "missing"
hf_agent: str = "https://api-inference.huggingface.co/models/bigcode/starcoder"
db_file_name: Path = Path("sqlite-data.db")
processing_max_length: PositiveInt = 10000 # in characters
sampling_rate: PositiveInt = 16_000 # audio transcription
max_yt_length: PositiveInt = 1800 # in minutes
pdf_stop_words: list[str] = [
"ACKNOWLEDGMENTS",
"Acknowledgments",
"acknowledgments",
"REFERENCES",
"References",
"references",
]
class Config:
# load .env file by default, with provisio to use other .env files if set
env_file = os.getenv('ENV_FILE', '.env')
_config = None
def get_config() -> Config:
global _config
if _config is None:
_config = Config()
return _config