Spaces:

Wauplin
/

bloomz.cpp-converter

Runtime error

File size: 3,238 Bytes

from pathlib import Path
from tempfile import TemporaryDirectory

import gradio as gr
from huggingface_hub import HfApi, Repository
from huggingface_hub.utils import RepositoryNotFoundError

from convert import convert


def run(
    token: str, model_id: str, precision: str, quantization: bool, destination: str
) -> str:
    if token == "" or model_id == "":
        return """
        ### Invalid input 🐞
        
        Please fill a token and model_id.
        """
    if destination == "":
        destination = model_id

    api = HfApi(token=token)
    try:
        # TODO: make a PR to bloomz.cpp to be able to pass a token
        api.model_info(
            repo_id=model_id, token=False
        )  # only public repos are accessible
    except RepositoryNotFoundError:
        return f"""
        ### Error 😢😢😢
        
        Repository {model_id} not found. Only public models are convertible at the moment.
        """

    try:
        with TemporaryDirectory() as cache_folder:
            model_path = convert(
                cache_folder=Path(cache_folder),
                model_id=model_id,
                precision=precision,
                quantization=quantization,
            )
            print("[model_path]", model_path)

            commit_info = api.upload_file(
                repo_id=destination,
                path_or_fileobj=model_path,
                path_in_repo=model_path.name,
                create_pr=True,
                commit_message=f"Add {model_path.name} from bloomz.cpp converter.",
            )

            return f"""
            ### Success 🔥
            Yay! This model was successfully converted and a PR was open using your token, here:
            
            # [{commit_info.pr_url}]({commit_info.pr_url})
            """
    except Exception as e:
        return f"""
        ### Error 😢😢😢
        
        {e}
        """


DESCRIPTION = """
The steps are the following:
- Paste your HF token. You can create one in your [settings page](https://huggingface.co/settings/tokens).
- Input a model id from the Hub. This model must be public.
- Choose which precision you want to use (default to FP16).
- (optional) Opt-in for 4-bit quantization.
- (optional) By default a PR to the initial repo will be created. You can choose a different destination repo if you want. The destination repo must exist.
- Click Submit:

That's it! You'll get feedback if it works or not, and if it worked, you'll get the URL of the opened PR 🔥
"""

demo = gr.Interface(
    title="Convert any BLOOM-like model to be compatible with bloomz.cpp",
    description=DESCRIPTION,
    allow_flagging="never",
    article="Check out the [bloomz.cpp](https://github.com/NouamaneTazi/bloomz.cpp) repo on GitHub",
    inputs=[
        gr.Text(max_lines=1, label="your hf_token"),
        gr.Text(max_lines=1, label="model_id (e.g.: bigscience/bloomz-7b1)"),
        gr.Radio(choices=["FP16", "FP32"], label="Precision", value="FP16"),
        gr.Checkbox(value=False, label="4-bits quantization"),
        gr.Text(max_lines=1, label="destination (e.g.: my-username/bloomz-7b1.cpp)"),
    ],
    outputs=[gr.Markdown(label="output")],
    fn=run,
).queue()

demo.launch()