ClinicalTrialBasics / postBuild
essprasad's picture
Upload 10 files
7829d29 verified
#!/bin/bash
set -e
echo "πŸ”§ PostBuild starting β€” optimizing CT-Chat Space..."
# -------------------------------------------------------
# 1️⃣ Fix dependency mismatches (Gradio & Websockets)
# -------------------------------------------------------
pip install --force-reinstall --no-cache-dir "websockets>=12" "gradio-client>=1.3.0"
# -------------------------------------------------------
# 2️⃣ Create and register shared NLTK data directory
# -------------------------------------------------------
echo "πŸ“ Preparing shared NLTK data directory..."
export NLTK_DATA="/usr/local/share/nltk_data"
mkdir -p $NLTK_DATA
chmod -R 777 $NLTK_DATA
# -------------------------------------------------------
# 3️⃣ Preload all required NLTK resources (including punkt_tab)
# -------------------------------------------------------
echo "πŸ“¦ Downloading NLTK resources..."
python -m nltk.downloader -d $NLTK_DATA \
punkt punkt_tab averaged_perceptron_tagger averaged_perceptron_tagger_eng stopwords wordnet omw-1.4
# -------------------------------------------------------
# 4️⃣ Verify NLTK installs and paths
# -------------------------------------------------------
python - <<'PYCODE'
import nltk, os
print(f"NLTK data path β†’ {nltk.data.path}")
for pkg in ["punkt", "punkt_tab", "averaged_perceptron_tagger_eng", "stopwords", "wordnet"]:
try:
nltk.data.find(pkg)
print(f"βœ… Verified NLTK resource: {pkg}")
except LookupError:
print(f"⚠️ Missing NLTK resource: {pkg}")
PYCODE
# -------------------------------------------------------
# 5️⃣ Clean caches (stay <50GB)
# -------------------------------------------------------
echo "🧹 Cleaning Hugging Face + Torch caches..."
rm -rf /root/.cache/* || true
rm -rf /home/user/.cache/* || true
rm -rf /usr/local/share/nltk_data/taggers/__pycache__ || true
rm -rf /home/user/app/hf_cache/* || true
rm -rf /home/user/app/logs/* || true
# -------------------------------------------------------
# 6️⃣ Ensure writable temporary cache for runtime
# -------------------------------------------------------
echo "πŸ“¦ Preparing /tmp/hf_cache..."
mkdir -p /tmp/hf_cache
chmod -R 777 /tmp/hf_cache
# -------------------------------------------------------
# βœ… Done
# -------------------------------------------------------
echo "βœ… PostBuild completed successfully β€” NLTK preloaded (punkt_tab OK), cache ready at /tmp/hf_cache."