repo_duplicator / app.py
osanseviero's picture
osanseviero HF staff
Update app.py
707bc6c
raw history blame
No virus
3.35 kB
import gradio as gr
from huggingface_hub import create_repo, upload_file, Repository, whoami
import subprocess
import os, shutil
def duplicate(source_repo, dst_repo, token, repo_type):
# Creating repos has inconsistent API (https://github.com/huggingface/huggingface_hub/issues/47)
repo_namespace, dst_id = dst_repo.split("/")
username = whoami(token)["name"]
org = None
if repo_namespace != username:
org = repo_namespace
# Create the destination repo
if repo_type in ["space", "dataset"]:
# For some reason create_repo does not allow repo_type="model"..., even if documentation says
# that's the default.
url = create_repo(dst_id, token=token, organization=org, repo_type=repo_type, space_sdk="gradio", private=False)
else:
url = create_repo(dst_id, token=token, organization=org, private=False)
# Clone source repo
endpoint = "huggingface.co/"
if repo_type in ["space", "dataset"]:
endpoint += repo_type + "/"
full_path = f"https://{username}:{token}@{endpoint}{source_repo}"
local_dir = "hub/" + source_repo
if repo_type in ["space", "dataset"]:
# Same as above
repo = Repository(local_dir=local_dir, clone_from=full_path, repo_type=repo_type)
else:
repo = Repository(local_dir=local_dir, clone_from=full_path)
for root, dirs, files in os.walk(local_dir):
if not root.startswith("."):
if repo_type == "model":
repo_type = None
for f in files:
if not f.startswith("."):
if ".git" not in root:
# remove hub/namespace/reponame
directory_path_in_repo = "/".join(root.split("/")[3:])
path_in_repo = os.path.join(directory_path_in_repo, f)
local_file_path = os.path.join(local_dir, path_in_repo)
print("From: ", local_file_path, " to: ", path_in_repo)
upload_file(path_or_fileobj=local_file_path, path_in_repo=path_in_repo, repo_id=dst_repo, token=token, repo_type=repo_type)
# Clean up to be nice with the environment
for filename in os.listdir(local_dir):
file_path = os.path.join(local_dir, filename)
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
return f"Find your repo <a href='{url}' target=\"_blank\" style=\"text-decoration:underline\">here</a>", "sp.jpg"
interface = gr.Interface(
fn=duplicate,
inputs=[
gr.inputs.Textbox(placeholder="Source repository (e.g. osanseviero/src)"),
gr.inputs.Textbox(placeholder="Destination repository (e.g. osanseviero/dst)"),
gr.inputs.Textbox(placeholder="Write access token"),
gr.inputs.Dropdown(choices=["model", "dataset", "space"])
],
outputs=["html", "image"] ,
title="Duplicate your repo!",
description="Duplicate a Hugging Face repository! You need to specify a write token obtained in https://hf.co/settings/token. This Space is a an experimental demo.",
article="<p>Find your write token at <a href='https://huggingface.co/settings/token' target='_blank'>token settings</a></p>",
allow_flagging=False,
live=False,
)
interface.launch(enable_queue=True)