File size: 2,853 Bytes
129f554
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# THIS SOFTWARE IS NOT OPEN SOURCED!!! REDISTRIBUTION PROHIBITED! SEE LICENSE FOR DETAILS.
from constants import *

from llama_cpp import llama_cpp
types = {
    'F32': 0,
    'F16': 1,
    'Q4_0': 2,
    'Q4_1': 3,
    'Q8_0': 7,
    'Q5_0': 8,
    'Q5_1': 9,
    'Q2_K': 10,
    'Q3_K_S': 11,
    'Q3_K_M': 12,
    'Q3_K_L': 13,
    'Q4_K_S': 14,
    'Q4_K_M': 15,
    'Q5_K_S': 16,
    'Q5_K_M': 17,
    'Q6_K': 18,
}
def calcftype(type):
    return types[type.upper()]


import shutil
import tempfile
import os
from slugify import slugify
from huggingface_hub import CommitInfo, CommitOperationAdd, Discussion, HfApi, hf_hub_download, repo_exists
from huggingface_hub.file_download import repo_folder_name
from typing import Dict, List, Optional, Set, Tuple
from huggingface_hub import snapshot_download
from cscript import main

def convert_it(
    model_id, token, folder
):
    with open("README_TEMPLATE.md", 'r') as f:
        README = f.read().replace('<<MODEL_ID>>', model_id)
    path = snapshot_download(
        repo_id=model_id, token=token, cache_dir=folder
    )
    sf_name = "model-f16.gguf"
    main(path, os.path.join(folder, "model-f16.gguf"))
    operation = [
        CommitOperationAdd(path_in_repo=sf_name, path_or_fileobj=os.path.join(folder, "model-f16.gguf")),
        CommitOperationAdd(path_in_repo="README.md", path_or_fileobj=README.encode()),
    ]
    print("Quantization Time!")
    for type in types_to_quantize:
        print(f"Quantizing {type}!")
        llama_cpp.llama_model_quantize(os.path.join(folder, "model-f16.gguf").encode(), os.path.join(folder, f"model-{type.lower()}.gguf").encode(), llama_cpp.llama_model_quantize_params(0, calcftype(type), True))
        print(f"Done Quantizing {type}!")
        operation.append(CommitOperationAdd(path_in_repo=f"model-{type.lower()}.gguf", path_or_fileobj=os.path.join(folder, f"model-{type.lower()}.gguf")))
    return operation


def convert(
    api: "HfApi", model_id: str, revision: Optional[str] = None, force: bool = False
) -> Tuple["CommitInfo", List[Tuple[str, "Exception"]]]:
    repo_id = username + "/" + slugify(model_id.strip()) + "-GGUF"
    with tempfile.TemporaryDirectory() as d:
        # d = "~/test"
        folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
        os.makedirs(folder)
        if repo_exists(repo_id, token=api.token):
            raise ValueError("Already exists")
        try:
            ops = convert_it(model_id, api.token, d)
            api.create_repo(repo_id)
            api.create_commit(
                repo_id=repo_id,
                revision=revision,
                operations=ops,
                commit_message="Add GGUF version",
                commit_description="Automated commit"
            )
        finally:
            shutil.rmtree(folder)
        return repo_id