File size: 3,267 Bytes
9bf4bd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# Copyright (c) OpenMMLab. All rights reserved.
import hashlib
import os.path as osp
import sys
import warnings
from glob import glob
from typing import List

from mmengine import mkdir_or_exist


def list_to_file(filename, lines):
    """Write a list of strings to a text file.

    Args:
        filename (str): The output filename. It will be created/overwritten.
        lines (list(str)): Data to be written.
    """
    mkdir_or_exist(osp.dirname(filename))
    with open(filename, 'w', encoding='utf-8') as fw:
        for line in lines:
            fw.write(f'{line}\n')


def list_from_file(filename, encoding='utf-8'):
    """Load a text file and parse the content as a list of strings. The
    trailing "\\r" and "\\n" of each line will be removed.

    Note:
        This will be replaced by mmcv's version after it supports encoding.

    Args:
        filename (str): Filename.
        encoding (str): Encoding used to open the file. Default utf-8.

    Returns:
        list[str]: A list of strings.
    """
    item_list = []
    with open(filename, encoding=encoding) as f:
        for line in f:
            item_list.append(line.rstrip('\n\r'))
    return item_list


def is_archive(file_path: str) -> bool:
    """Check whether the file is a supported archive format.

    Args:
        file_path (str): Path to the file.

    Returns:
        bool: Whether the file is an archive.
    """

    suffixes = ['zip', 'tar', 'tar.gz']

    for suffix in suffixes:
        if file_path.endswith(suffix):
            return True
    return False


def check_integrity(file_path: str,
                    md5: str,
                    chunk_size: int = 1024 * 1024) -> bool:
    """Check if the file exist and match to the given md5 code.

    Args:
        file_path (str): Path to the file.
        md5 (str): MD5 to be matched.
        chunk_size (int, optional): Chunk size. Defaults to 1024*1024.

    Returns:
        bool: Whether the md5 is matched.
    """
    if md5 is None:
        warnings.warn('MD5 is None, skip the integrity check.')
        return True
    if not osp.exists(file_path):
        return False

    return get_md5(file_path=file_path, chunk_size=chunk_size) == md5


def get_md5(file_path: str, chunk_size: int = 1024 * 1024) -> str:
    """Get the md5 of the file.

    Args:
        file_path (str): Path to the file.
        chunk_size (int, optional): Chunk size. Defaults to 1024*1024.

    Returns:
        str: MD5 of the file.
    """
    if not osp.exists(file_path):
        raise FileNotFoundError(f'{file_path} does not exist.')

    if sys.version_info >= (3, 9):
        hash = hashlib.md5(usedforsecurity=False)
    else:
        hash = hashlib.md5()
    with open(file_path, 'rb') as f:
        for chunk in iter(lambda: f.read(chunk_size), b''):
            hash.update(chunk)

    return hash.hexdigest()


def list_files(path: str, suffixes: List) -> List:
    """Retrieve file list from the path.

    Args:
        path (str): Path to the directory.
        suffixes (list[str], optional): Suffixes to be retrieved.

    Returns:
        List: List of the files.
    """

    file_list = []
    for suffix in suffixes:
        file_list.extend(glob(osp.join(path, '*' + suffix)))

    return file_list