model-scan-2 / scan_hash.py
pengdaqian
fix now
301572e
raw
history blame contribute delete
No virus
2.23 kB
import hashlib
import os.path
import sys
from typing import Dict
import binascii
import crcmod
import requests
import pure_blake3
def get_hash_string(hash_bytes: bytes) -> str:
return binascii.hexlify(hash_bytes).decode()
def compute_AutoV1Hash(file_stream) -> str:
minFileSize = 0x100000 * 2
if file_stream.seek(0, 2) < minFileSize:
return None
file_stream.seek(0x100000)
buffer = file_stream.read(0x10000)
hashBytes = hashlib.sha256(buffer).digest()
hashString = get_hash_string(hashBytes)
return hashString[:8]
def ComputeCRC32Hash(file_stream) -> str:
crc32 = crcmod.predefined.Crc('crc-32c')
file_stream.seek(0)
for chunk in iter(lambda: file_stream.read(4096), b""):
crc32.update(chunk)
return get_hash_string(crc32.digest())
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"
}
def generate_model_hashes(file_path: str) -> Dict[str, str]:
if file_path.startswith("http"):
import urllib.request
tmp_path = f'/tmp/clamd_{file_path.split("/")[-1].split("?")[0]}'
if os.path.exists(tmp_path):
pass
else:
resp = requests.get(file_path, headers=headers).content
with open(tmp_path, "wb") as f:
f.write(resp)
file_path = tmp_path
sha256 = hashlib.sha256()
blake3Hasher = pure_blake3.Hasher()
with open(file_path, "rb") as fileStream:
for chunk in iter(lambda: fileStream.read(4096), b""):
sha256.update(chunk)
blake3Hasher.update(chunk)
sha256HashString = get_hash_string(sha256.digest())
autoV1HashString = compute_AutoV1Hash(open(file_path, "rb"))
autoV2HashString = sha256HashString[:10]
blake3HashString = blake3Hasher.finalize().hex()
crc32HashString = ComputeCRC32Hash(open(file_path, "rb"))
result = {
"SHA256": sha256HashString,
"AutoV1": autoV1HashString,
"AutoV2": autoV2HashString,
"BLAKE3": blake3HashString,
"CRC32": crc32HashString,
}
return result
if __name__ == "__main__":
print(generate_model_hashes(".gitignore"))