|
#!/bin/bash |
|
|
|
if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then |
|
echo "缺少环境变量HF_TOKEN或DATASET_ID,启动服务但不启用备份功能" |
|
exec uvicorn app.main:app --host 0.0.0.0 --port 7860 |
|
exit 0 |
|
fi |
|
|
|
export HUGGING_FACE_HUB_TOKEN=$HF_TOKEN |
|
|
|
restore_latest() { |
|
echo "正在检查备份..." |
|
python3 -c " |
|
from huggingface_hub import HfApi |
|
import os |
|
|
|
api = HfApi() |
|
files = api.list_repo_files('${DATASET_ID}', repo_type='dataset') |
|
backup_files = sorted([f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')]) |
|
|
|
if backup_files: |
|
latest = backup_files[-1] |
|
print(f'找到备份文件: {latest}, 开始下载...') |
|
api.hf_hub_download('${DATASET_ID}', latest, repo_type='dataset', local_dir='/tmp') |
|
|
|
backup_path = f'/tmp/{latest}' |
|
if os.path.exists(backup_path): |
|
print(f'备份文件已下载: {backup_path}, 大小: {os.path.getsize(backup_path)} bytes') |
|
# 解压到/app/app目录,避免路径嵌套和权限问题 |
|
result = os.system(f'tar --no-same-owner --no-same-permissions --touch --warning=no-timestamp -xzf {backup_path} -C /app/app || true') |
|
exit_code = result >> 8 |
|
if exit_code == 0: |
|
print(f'成功恢复数据!') |
|
else: |
|
print(f'解压时出现次要警告或错误,请检查数据完整性,tar返回码: {result}') |
|
os.remove(backup_path) |
|
else: |
|
print('下载备份文件失败!') |
|
else: |
|
print('未发现任何备份文件,跳过恢复步骤') |
|
" |
|
} |
|
|
|
backup_upload_download_test() { |
|
echo "正在执行备份上传-下载权限完整性测试..." |
|
|
|
TEST_FILE_CONTENT="备份测试内容 $(date)" |
|
TEST_FILE_NAME="backup_test_$(date +%Y%m%d_%H%M%S).txt" |
|
LOCAL_TEST_PATH="/tmp/${TEST_FILE_NAME}" |
|
|
|
echo "${TEST_FILE_CONTENT}" > "${LOCAL_TEST_PATH}" |
|
|
|
python3 -c " |
|
from huggingface_hub import HfApi |
|
import os, sys |
|
|
|
api = HfApi() |
|
repo_id = '${DATASET_ID}' |
|
test_file_name = '${TEST_FILE_NAME}' |
|
local_test_path = '${LOCAL_TEST_PATH}' |
|
|
|
# 上传测试文件 |
|
try: |
|
api.upload_file( |
|
path_or_fileobj=local_test_path, |
|
path_in_repo=test_file_name, |
|
repo_id=repo_id, |
|
repo_type='dataset' |
|
) |
|
print('✅ 测试文件上传成功') |
|
except Exception as e: |
|
print(f'❌ 测试文件上传失败: {e}') |
|
sys.exit(1) |
|
|
|
# 下载测试文件 |
|
try: |
|
downloaded_path = api.hf_hub_download( |
|
repo_id=repo_id, |
|
filename=test_file_name, |
|
repo_type='dataset', |
|
local_dir='/tmp' |
|
) |
|
with open(downloaded_path, 'r') as f: |
|
content = f.read().strip() |
|
if content == '${TEST_FILE_CONTENT}': |
|
print('✅ 测试文件下载成功且内容一致') |
|
else: |
|
print('❌ 测试文件内容不一致') |
|
sys.exit(1) |
|
except Exception as e: |
|
print(f'❌ 测试文件下载失败: {e}') |
|
sys.exit(1) |
|
|
|
# 测试完成后删除测试文件 |
|
try: |
|
api.delete_file( |
|
path_in_repo=test_file_name, |
|
repo_id=repo_id, |
|
repo_type='dataset' |
|
) |
|
print('✅ 测试文件已成功删除') |
|
except Exception as e: |
|
print(f'⚠️ 测试文件删除失败: {e}') |
|
|
|
# 清理本地临时文件 |
|
os.remove(local_test_path) |
|
os.remove(downloaded_path) |
|
" |
|
} |
|
|
|
|
|
sync_data() { |
|
echo "启动后首次备份将在${SYNC_INTERVAL:-7200}秒后执行" |
|
sleep ${SYNC_INTERVAL:-7200} |
|
|
|
while true; do |
|
echo "开始备份: $(date)" |
|
cd /app/app |
|
timestamp=$(date +%Y%m%d_%H%M%S) |
|
backup_file="backup_${timestamp}.tar.gz" |
|
|
|
if [ "$(ls -A . 2>/dev/null)" ]; then |
|
tar -czf "/tmp/${backup_file}" ./ |
|
python3 -c " |
|
from huggingface_hub import HfApi |
|
api = HfApi() |
|
api.upload_file( |
|
path_or_fileobj='/tmp/${backup_file}', |
|
path_in_repo='${backup_file}', |
|
repo_id='${DATASET_ID}', |
|
repo_type='dataset' |
|
) |
|
print('备份上传成功') |
|
backup_files = sorted([f for f in api.list_repo_files('${DATASET_ID}', repo_type='dataset') if f.startswith('backup_')]) |
|
for old_backup in backup_files[:-50]: |
|
api.delete_file(path_in_repo=old_backup, repo_id='${DATASET_ID}', repo_type='dataset') |
|
print(f'删除旧备份: {old_backup}') |
|
" |
|
rm -f "/tmp/${backup_file}" |
|
else |
|
echo "无数据需要备份" |
|
fi |
|
echo "下次备份将在${SYNC_INTERVAL:-7200}秒后执行" |
|
sleep ${SYNC_INTERVAL:-7200} |
|
done |
|
} |
|
|
|
|
|
( |
|
restore_latest |
|
backup_upload_download_test |
|
sync_data & |
|
exec uvicorn app.main:app --host 0.0.0.0 --port 7860 |
|
) 2>&1 | tee -a /app/data/backup.log |
|
|
|
|