from pathlib import Path from tempfile import TemporaryDirectory import gradio as gr from huggingface_hub import HfApi, Repository from huggingface_hub.utils import RepositoryNotFoundError from convert import convert def run( token: str, model_id: str, precision: str, quantization: bool, destination: str ) -> str: if token == "" or model_id == "": return """ ### Invalid input 🐞 Please fill a token and model_id. """ if destination == "": destination = model_id api = HfApi(token=token) try: # TODO: make a PR to bloomz.cpp to be able to pass a token api.model_info( repo_id=model_id, token=False ) # only public repos are accessible except RepositoryNotFoundError: return f""" ### Error 😢😢😢 Repository {model_id} not found. Only public models are convertible at the moment. """ try: with TemporaryDirectory() as cache_folder: model_path = convert( cache_folder=Path(cache_folder), model_id=model_id, precision=precision, quantization=quantization, ) print("[model_path]", model_path) commit_info = api.upload_file( repo_id=destination, path_or_fileobj=model_path, path_in_repo=model_path.name, create_pr=True, commit_message=f"Add {model_path.name} from bloomz.cpp converter.", ) return f""" ### Success 🔥 Yay! This model was successfully converted and a PR was open using your token, here: # [{commit_info.pr_url}]({commit_info.pr_url}) """ except Exception as e: return f""" ### Error 😢😢😢 {e} """ DESCRIPTION = """ The steps are the following: - Paste your HF token. You can create one in your [settings page](https://huggingface.co/settings/tokens). - Input a model id from the Hub. This model must be public. - Choose which precision you want to use (default to FP16). - (optional) Opt-in for 4-bit quantization. - (optional) By default a PR to the initial repo will be created. You can choose a different destination repo if you want. The destination repo must exist. - Click Submit: That's it! You'll get feedback if it works or not, and if it worked, you'll get the URL of the opened PR 🔥 """ demo = gr.Interface( title="Convert any BLOOM-like model to be compatible with bloomz.cpp", description=DESCRIPTION, allow_flagging="never", article="Check out the [bloomz.cpp](https://github.com/NouamaneTazi/bloomz.cpp) repo on GitHub", inputs=[ gr.Text(max_lines=1, label="your hf_token"), gr.Text(max_lines=1, label="model_id (e.g.: bigscience/bloomz-7b1)"), gr.Radio(choices=["FP16", "FP32"], label="Precision", value="FP16"), gr.Checkbox(value=False, label="4-bits quantization"), gr.Text(max_lines=1, label="destination (e.g.: my-username/bloomz-7b1.cpp)"), ], outputs=[gr.Markdown(label="output")], fn=run, ).queue() demo.launch()