John6666 commited on
Commit
a51f990
1 Parent(s): 269ab06

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +19 -3
  2. requirements.txt +1 -1
app.py CHANGED
@@ -6,6 +6,7 @@ import os
6
  import tempfile
7
  import shutil
8
  import urllib
 
9
  from huggingface_hub import whoami, HfApi, hf_hub_download, RepoCard
10
  from huggingface_hub.utils import build_hf_headers, hf_raise_for_status
11
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
@@ -102,6 +103,16 @@ def parse_repos(s):
102
  except Exception:
103
  return []
104
 
 
 
 
 
 
 
 
 
 
 
105
  def duplicate_m2o(source_repos_str, dst_repo, repo_type, private, overwrite, oauth_token: gr.OAuthToken | None, progress=gr.Progress(track_tqdm=True)):
106
  hf_token = oauth_token.token
107
  api = HfApi(token=hf_token)
@@ -129,15 +140,20 @@ def duplicate_m2o(source_repos_str, dst_repo, repo_type, private, overwrite, oau
129
  subfolder = subfolder_prefix + "/" + source_repo if subfolder_prefix else source_repo
130
 
131
  temp_dir = tempfile.mkdtemp()
132
- api.create_repo(repo_id=dst_repo, repo_type=repo_type, private=private, exist_ok=True, token=hf_token)
 
133
  for path in api.list_repo_files(repo_id=source_repo, repo_type=repo_type, token=hf_token):
134
  if target and target not in path: continue
 
 
 
 
135
  file = hf_hub_download(repo_id=source_repo, filename=path, repo_type=repo_type, local_dir=temp_dir, token=hf_token)
136
  if not Path(file).exists(): continue
137
  if Path(file).is_dir(): # unused for now
138
- api.upload_folder(repo_id=dst_repo, folder_path=file, path_in_repo=f"{subfolder}/{path}" if subfolder else path, repo_type=repo_type, token=hf_token)
139
  elif Path(file).is_file():
140
- api.upload_file(repo_id=dst_repo, path_or_fileobj=file, path_in_repo=f"{subfolder}/{path}" if subfolder else path, repo_type=repo_type, token=hf_token)
141
  if Path(file).exists(): Path(file).unlink()
142
  if repo_type == "dataset": repo_url = f"https://huggingface.co/datasets/{dst_repo}"
143
  elif repo_type == "space": repo_url = f"https://huggingface.co/spaces/{dst_repo}"
 
6
  import tempfile
7
  import shutil
8
  import urllib
9
+ import hashlib
10
  from huggingface_hub import whoami, HfApi, hf_hub_download, RepoCard
11
  from huggingface_hub.utils import build_hf_headers, hf_raise_for_status
12
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
 
103
  except Exception:
104
  return []
105
 
106
+ def is_same_file_hf(src_repo: str, src_path: str, src_type: str, dst_repo: str, dst_path: str, dst_type: str, hf_token: str):
107
+ api = HfApi(token=hf_token)
108
+ if not api.file_exists(repo_id=src_repo, filename=src_path, repo_type=src_type, token=hf_token): return False
109
+ if not api.file_exists(repo_id=dst_repo, filename=dst_path, repo_type=dst_type, token=hf_token): return False
110
+ src_info = api.get_paths_info(repo_id=src_repo, paths=src_path, repo_type=src_type, token=hf_token)
111
+ dst_info = api.get_paths_info(repo_id=dst_repo, paths=dst_path, repo_type=dst_type, token=hf_token)
112
+ if not src_info or not dst_info or len(src_info) != 1 or len(dst_info) != 1 or src_info[0].lfs is None: return False
113
+ if src_info[0].size == dst_info[0].size and src_info[0].lfs.sha256 == dst_info[0].lfs.sha256: return True
114
+ return False
115
+
116
  def duplicate_m2o(source_repos_str, dst_repo, repo_type, private, overwrite, oauth_token: gr.OAuthToken | None, progress=gr.Progress(track_tqdm=True)):
117
  hf_token = oauth_token.token
118
  api = HfApi(token=hf_token)
 
140
  subfolder = subfolder_prefix + "/" + source_repo if subfolder_prefix else source_repo
141
 
142
  temp_dir = tempfile.mkdtemp()
143
+ if repo_type == "space": api.create_repo(repo_id=dst_repo, repo_type=repo_type, private=private, exist_ok=True, space_sdk="gradio", token=hf_token)
144
+ else: api.create_repo(repo_id=dst_repo, repo_type=repo_type, private=private, exist_ok=True, token=hf_token)
145
  for path in api.list_repo_files(repo_id=source_repo, repo_type=repo_type, token=hf_token):
146
  if target and target not in path: continue
147
+ path_in_repo = f"{subfolder}/{path}" if subfolder else path
148
+ if is_same_file_hf(source_repo, path, repo_type, dst_repo, path_in_repo, repo_type, hf_token):
149
+ print(f"{dst_repo}/{path_in_repo} is already exists. Skipping.")
150
+ continue
151
  file = hf_hub_download(repo_id=source_repo, filename=path, repo_type=repo_type, local_dir=temp_dir, token=hf_token)
152
  if not Path(file).exists(): continue
153
  if Path(file).is_dir(): # unused for now
154
+ api.upload_folder(repo_id=dst_repo, folder_path=file, path_in_repo=path_in_repo, repo_type=repo_type, token=hf_token)
155
  elif Path(file).is_file():
156
+ api.upload_file(repo_id=dst_repo, path_or_fileobj=file, path_in_repo=path_in_repo, repo_type=repo_type, token=hf_token)
157
  if Path(file).exists(): Path(file).unlink()
158
  if repo_type == "dataset": repo_url = f"https://huggingface.co/datasets/{dst_repo}"
159
  elif repo_type == "space": repo_url = f"https://huggingface.co/spaces/{dst_repo}"
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
  huggingface_hub>=0.22.2
2
- gradio_huggingfacehub_search>=0.0.7
 
1
  huggingface_hub>=0.22.2
2
+ gradio_huggingfacehub_search==0.0.7