File size: 2,226 Bytes
cb34746
 
 
 
 
 
301572e
 
cb34746
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301572e
 
 
 
cb34746
 
 
301572e
cb34746
 
 
301572e
 
 
cb34746
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import hashlib
import os.path
import sys
from typing import Dict
import binascii
import crcmod
import requests

import pure_blake3


def get_hash_string(hash_bytes: bytes) -> str:
    return binascii.hexlify(hash_bytes).decode()


def compute_AutoV1Hash(file_stream) -> str:
    minFileSize = 0x100000 * 2
    if file_stream.seek(0, 2) < minFileSize:
        return None
    file_stream.seek(0x100000)
    buffer = file_stream.read(0x10000)
    hashBytes = hashlib.sha256(buffer).digest()
    hashString = get_hash_string(hashBytes)
    return hashString[:8]


def ComputeCRC32Hash(file_stream) -> str:
    crc32 = crcmod.predefined.Crc('crc-32c')
    file_stream.seek(0)
    for chunk in iter(lambda: file_stream.read(4096), b""):
        crc32.update(chunk)
    return get_hash_string(crc32.digest())


headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"
}

def generate_model_hashes(file_path: str) -> Dict[str, str]:
    if file_path.startswith("http"):
        import urllib.request
        tmp_path = f'/tmp/clamd_{file_path.split("/")[-1].split("?")[0]}'
        if os.path.exists(tmp_path):
            pass
        else:
            resp = requests.get(file_path, headers=headers).content
            with open(tmp_path, "wb") as f:
                f.write(resp)
        file_path = tmp_path

    sha256 = hashlib.sha256()
    blake3Hasher = pure_blake3.Hasher()

    with open(file_path, "rb") as fileStream:
        for chunk in iter(lambda: fileStream.read(4096), b""):
            sha256.update(chunk)
            blake3Hasher.update(chunk)
    sha256HashString = get_hash_string(sha256.digest())
    autoV1HashString = compute_AutoV1Hash(open(file_path, "rb"))
    autoV2HashString = sha256HashString[:10]
    blake3HashString = blake3Hasher.finalize().hex()
    crc32HashString = ComputeCRC32Hash(open(file_path, "rb"))

    result = {
        "SHA256": sha256HashString,
        "AutoV1": autoV1HashString,
        "AutoV2": autoV2HashString,
        "BLAKE3": blake3HashString,
        "CRC32": crc32HashString,
    }

    return result


if __name__ == "__main__":
    print(generate_model_hashes(".gitignore"))