File size: 2,954 Bytes
150a1f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
683090e
 
 
 
 
 
150a1f4
683090e
 
 
 
 
 
 
 
 
150a1f4
 
 
 
 
 
 
683090e
 
 
150a1f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/bin/bash

# 检查环境变量
if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
    echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
    exec python main.py
fi

# 登录HuggingFace (使用环境变量方式避免交互问题)
export HUGGING_FACE_HUB_TOKEN=$HF_TOKEN

# 同步函数
sync_data() {
    while true; do
        echo "Starting sync process at $(date)"
        
        # 创建临时压缩文件
        cd /app
        timestamp=$(date +%Y%m%d_%H%M%S)
        backup_file="backup_${timestamp}.tar.gz"
        
        tar -czf "/tmp/${backup_file}" data/
        
        python3 -c "
from huggingface_hub import HfApi
import os
def manage_backups(api, repo_id, max_files=50):
    files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
    backup_files = [f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')]
    backup_files.sort()
    
    if len(backup_files) >= max_files:
        files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
        for file_to_delete in files_to_delete:
            try:
                api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type='dataset')
                print(f'Deleted old backup: {file_to_delete}')
            except Exception as e:
                print(f'Error deleting {file_to_delete}: {str(e)}')
try:
    api = HfApi()
    api.upload_file(
        path_or_fileobj='/tmp/${backup_file}',
        path_in_repo='${backup_file}',
        repo_id='${DATASET_ID}',
        repo_type='dataset'
    )
    print('Backup uploaded successfully')
    
    manage_backups(api, '${DATASET_ID}')
except Exception as e:
    print(f'Backup failed: {str(e)}')
"
        # 清理临时文件
        rm -f "/tmp/${backup_file}"
        
        # 设置同步间隔
        SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
        echo "Next sync in ${SYNC_INTERVAL} seconds..."
        sleep $SYNC_INTERVAL
    done
}

# 恢复函数
restore_latest() {
    echo "Attempting to restore latest backup..."
    python3 -c "
try:
    from huggingface_hub import HfApi
    import os
    
    api = HfApi()
    files = api.list_repo_files('${DATASET_ID}', repo_type='dataset')
    backup_files = [f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')]
    
    if backup_files:
        latest = sorted(backup_files)[-1]
        api.hf_hub_download(
            repo_id='${DATASET_ID}',
            filename=latest,
            repo_type='dataset',
            local_dir='/tmp'
        )
        os.system(f'tar -xzf /tmp/{latest} -C /app')
        os.remove(f'/tmp/{latest}')
        print(f'Restored from {latest}')
    else:
        print('No backup found')
except Exception as e:
    print(f'Restore failed: {str(e)}')
"
}

# 主程序
(
    # 尝试恢复
    restore_latest
    
    # 启动同步进程
    sync_data &
    
    # 启动主应用
    exec python main.py
) 2>&1 | tee -a /app/data/backup.log