depclau / sync_data.sh
nbugs's picture
Update sync_data.sh
b9b6493 verified
#!/bin/bash
if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
echo "缺少环境变量HF_TOKEN或DATASET_ID,启动服务但不启用备份功能"
exec uvicorn app.main:app --host 0.0.0.0 --port 7860
exit 0
fi
export HUGGING_FACE_HUB_TOKEN=$HF_TOKEN
restore_latest() {
echo "正在检查备份..."
python3 -c "
from huggingface_hub import HfApi
import os
api = HfApi()
files = api.list_repo_files('${DATASET_ID}', repo_type='dataset')
backup_files = sorted([f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')])
if backup_files:
latest = backup_files[-1]
print(f'找到备份文件: {latest}, 开始下载...')
api.hf_hub_download('${DATASET_ID}', latest, repo_type='dataset', local_dir='/tmp')
backup_path = f'/tmp/{latest}'
if os.path.exists(backup_path):
print(f'备份文件已下载: {backup_path}, 大小: {os.path.getsize(backup_path)} bytes')
# 解压到/app/app目录,避免路径嵌套和权限问题
result = os.system(f'tar --no-same-owner --no-same-permissions --touch --warning=no-timestamp -xzf {backup_path} -C /app/app || true')
exit_code = result >> 8
if exit_code == 0:
print(f'成功恢复数据!')
else:
print(f'解压时出现次要警告或错误,请检查数据完整性,tar返回码: {result}')
os.remove(backup_path)
else:
print('下载备份文件失败!')
else:
print('未发现任何备份文件,跳过恢复步骤')
"
}
backup_upload_download_test() {
echo "正在执行备份上传-下载权限完整性测试..."
TEST_FILE_CONTENT="备份测试内容 $(date)"
TEST_FILE_NAME="backup_test_$(date +%Y%m%d_%H%M%S).txt"
LOCAL_TEST_PATH="/tmp/${TEST_FILE_NAME}"
echo "${TEST_FILE_CONTENT}" > "${LOCAL_TEST_PATH}"
python3 -c "
from huggingface_hub import HfApi
import os, sys
api = HfApi()
repo_id = '${DATASET_ID}'
test_file_name = '${TEST_FILE_NAME}'
local_test_path = '${LOCAL_TEST_PATH}'
# 上传测试文件
try:
api.upload_file(
path_or_fileobj=local_test_path,
path_in_repo=test_file_name,
repo_id=repo_id,
repo_type='dataset'
)
print('✅ 测试文件上传成功')
except Exception as e:
print(f'❌ 测试文件上传失败: {e}')
sys.exit(1)
# 下载测试文件
try:
downloaded_path = api.hf_hub_download(
repo_id=repo_id,
filename=test_file_name,
repo_type='dataset',
local_dir='/tmp'
)
with open(downloaded_path, 'r') as f:
content = f.read().strip()
if content == '${TEST_FILE_CONTENT}':
print('✅ 测试文件下载成功且内容一致')
else:
print('❌ 测试文件内容不一致')
sys.exit(1)
except Exception as e:
print(f'❌ 测试文件下载失败: {e}')
sys.exit(1)
# 测试完成后删除测试文件
try:
api.delete_file(
path_in_repo=test_file_name,
repo_id=repo_id,
repo_type='dataset'
)
print('✅ 测试文件已成功删除')
except Exception as e:
print(f'⚠️ 测试文件删除失败: {e}')
# 清理本地临时文件
os.remove(local_test_path)
os.remove(downloaded_path)
"
}
sync_data() {
echo "启动后首次备份将在${SYNC_INTERVAL:-7200}秒后执行"
sleep ${SYNC_INTERVAL:-7200}
while true; do
echo "开始备份: $(date)"
cd /app/app
timestamp=$(date +%Y%m%d_%H%M%S)
backup_file="backup_${timestamp}.tar.gz"
if [ "$(ls -A . 2>/dev/null)" ]; then
tar -czf "/tmp/${backup_file}" ./
python3 -c "
from huggingface_hub import HfApi
api = HfApi()
api.upload_file(
path_or_fileobj='/tmp/${backup_file}',
path_in_repo='${backup_file}',
repo_id='${DATASET_ID}',
repo_type='dataset'
)
print('备份上传成功')
backup_files = sorted([f for f in api.list_repo_files('${DATASET_ID}', repo_type='dataset') if f.startswith('backup_')])
for old_backup in backup_files[:-50]:
api.delete_file(path_in_repo=old_backup, repo_id='${DATASET_ID}', repo_type='dataset')
print(f'删除旧备份: {old_backup}')
"
rm -f "/tmp/${backup_file}"
else
echo "无数据需要备份"
fi
echo "下次备份将在${SYNC_INTERVAL:-7200}秒后执行"
sleep ${SYNC_INTERVAL:-7200}
done
}
(
restore_latest # 恢复备份
backup_upload_download_test # 新增:首次启动备份上传下载测试
sync_data & # 后台定期备份
exec uvicorn app.main:app --host 0.0.0.0 --port 7860
) 2>&1 | tee -a /app/data/backup.log