import os import pickle import tempfile import warnings from io import BytesIO from pathlib import Path from uuid import uuid4 import gradio as gr import joblib from huggingface_hub import upload_file from skops import io as sio title = "skops converter" desc = """ # Pickle to skops converter This space converts your pickle files to skops format. You can read more on the skops format [here]( https://skops.readthedocs.io/en/stable/persistence.html). You can use `skops.io.dump(joblib.load(in_file), out_file)` to do the conversion yourself, where `in_file` is your source pickle file and `out_file` is where you want to save the skops file. But only do that **if you trust the source of the pickle file**. You can then use `skops.io.load(skops_file, trusted=unknown_types)` to load the file, where `skops_file` is the converted skops format file, and the `unknown_types` is what you see in the "Unknown Types" box bellow. You can also locally reproduce this list using `skops.io.get_untrusted_types(file=skops_file)`. You should only load a `skops` file that you trust all the types included in the `unknown_types` list. ## Requirements This space assumes you have used the latest `joblib` and `scikit-learn` versions installed on your environment to create the pickle file. ## Reporting issues If you encounter an issue, please open an issue on the project's repository on the [issue tracker]( https://github.com/skops-dev/skops/issues/new?title=CONVERSION+error+from+hf.space&body=Paste+the+error+message+and+a+link+to+your+pickle+file+here+please) """ def convert(file, store): msg = "" try: with warnings.catch_warnings(record=True) as record: in_file = Path(file.name) if store: upload_file( path_or_fileobj=str(in_file), path_in_repo=f"{uuid4()}/{in_file.name}", repo_id="scikit-learn/pickle-to-skops", repo_type="dataset", token=os.environ["HF_TOKEN"], ) try: obj = joblib.load(in_file) except: with open(in_file, "rb") as f: obj = pickle.load(f) if "." in in_file.name: out_file = ".".join(in_file.name.split(".")[:-1]) else: out_file = in_file.name out_file += ".skops" path = tempfile.mkdtemp(prefix="gradio-convert-") out_file = Path(path) / out_file sio.dump(obj, out_file) unknown_types = sio.get_untrusted_types(file=out_file) if len(record): msg = "\n".join([repr(w.message) for w in record]) except Exception as e: return None, None, repr(e) return out_file, unknown_types, msg with gr.Blocks(title=title) as iface: gr.Markdown(desc) store = gr.Checkbox( label=( "Store a copy: if you leave this box checked, we store a copy of your" " pickle file in a private place, only used for us to find issues and" " improve the skops format. Please uncheck this box if your pickle file" " includes any personal or sensitive data." ), value=True, ) upload_button = gr.UploadButton( "Click to Upload a File", file_types=None, file_count="single", ) file_output = gr.File(label="Converted File") upload_button.upload( convert, [upload_button, store], [ file_output, gr.Text(label="Unknown Types"), gr.Text(label="Errors and Warnings"), ], api_name="upload-file", ) iface.launch(debug=True)