Spaces:
Running
Running
File size: 1,232 Bytes
2542be6 27537e8 2542be6 b8d1cbb 2542be6 b8d1cbb 2542be6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import nltk
import nltk.downloader
import spacy
from core.config import settings
from pathlib import Path
def initialize_nlp():
print("Initializing NLP resources...")
nltk_data_path = Path("/tmp/nltk_data")
nltk_data_path.mkdir(parents=True, exist_ok=True)
nltk.data.path.append(str(nltk_data_path))
# Download NLTK resources
nltk_resources = [
'maxent_ne_chunker',
'words',
'treebank',
'maxent_treebank_pos_tagger',
'punkt',
'averaged_perceptron_tagger'
]
for resource in nltk_resources:
nltk.downloader.download(resource, download_dir=str(nltk_data_path) ,quiet=True)
# Load spaCy model
spacy.load(settings.SPACY_MODEL)
print("NLP resources initialized successfully.")
# Global variables to store initialized resources
nlp = None
nltk_initialized = False
def get_nlp():
global nlp
if nlp is None:
nlp = spacy.load(settings.SPACY_MODEL)
return nlp
def get_nltk():
global nltk_initialized
if not nltk_initialized:
nltk.downloader.download('punkt', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)
nltk_initialized = True
return nltk |