davanstrien HF staff commited on
Commit
9d04ba5
1 Parent(s): 1cb8519

Refactor code to show files and directories in the

Browse files
Files changed (1) hide show
  1. app.py +53 -10
app.py CHANGED
@@ -106,10 +106,11 @@ def list_git_repo_files_and_directories(repo_url: str, branch: str = "main"):
106
  return get_files_and_directories(response)
107
 
108
 
109
- def show_directories(url: str):
110
  with contextlib.suppress(Exception):
111
  files_and_directories = list_git_repo_files_and_directories(url)
112
  directories = files_and_directories.get("directories", [])
 
113
  print(directories)
114
  return gr.Dropdown(
115
  label="Directories",
@@ -118,34 +119,76 @@ def show_directories(url: str):
118
  visible=True,
119
  interactive=True,
120
  multiselect=True,
 
 
 
 
 
 
 
121
  )
122
 
123
 
124
- with gr.Blocks() as demo:
125
- gr.Markdown("# Migrate a GitHub Repository to Hugging Face Hub")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  gr.Markdown("URL for the GitHub repository where the dataset is currently hosted")
127
  source_github_repository = gr.Textbox(lines=1, label="Source GitHub Repository URL")
128
- gr.Markdown("OPTIONAL: If you want to upload a specific folder in the GitHub repo")
 
 
 
129
  folder_in_github_repo = gr.Dropdown(
130
  None,
131
- label="Folder in GitHub Repository to upload",
 
 
 
 
 
 
132
  allow_custom_value=True,
133
  visible=True,
134
  )
135
  source_github_repository.change(
136
- show_directories, [source_github_repository], [folder_in_github_repo]
 
 
137
  )
138
- gr.Markdown("Destination repo for your dataset")
 
139
  destination_hf_hub_repository = gr.Textbox(
140
  label="Destination Hugging Face Repository",
141
- placeholder="username/repository_name",
142
  )
 
143
  gr.Markdown(
144
  """You need to provide a token with write access to the namespace you want to upload to.
145
- You can generate or access your token from [here](https://huggingface.co/settings/token)."""
146
  )
147
  hf_token = gr.Textbox(label="Hugging Face Token", type="password")
148
- summit_btn = gr.Button()
149
  result = gr.Markdown(label="Summary", visible=True)
150
  summit_btn.click(
151
  push_to_hf,
 
106
  return get_files_and_directories(response)
107
 
108
 
109
+ def show_files_and_directories(url: str):
110
  with contextlib.suppress(Exception):
111
  files_and_directories = list_git_repo_files_and_directories(url)
112
  directories = files_and_directories.get("directories", [])
113
+ files = files_and_directories.get("files", [])
114
  print(directories)
115
  return gr.Dropdown(
116
  label="Directories",
 
119
  visible=True,
120
  interactive=True,
121
  multiselect=True,
122
+ ), gr.Dropdown(
123
+ label="Files",
124
+ choices=files,
125
+ max_choices=1,
126
+ visible=True,
127
+ interactive=True,
128
+ multiselect=True,
129
  )
130
 
131
 
132
+ html_text_app_description = """
133
+ Whilst GitHub is great for hosting code the Hugging Face Datasets Hub is a better place to host datasets.
134
+ Some of the benefits of hosting datasets on the Hugging Face Datasets Hub are:
135
+ <br>
136
+ <ul>
137
+ <li>Hosting for large datasets</li>
138
+ <li>An interactive preview of your dataset</li>
139
+ <li>Access to the dataset via many tools and libraries including; datasets, pandas, polars, dask and DuckDB</li>
140
+ </ul>
141
+
142
+ <br>
143
+ This app will help you migrate a dataset currently hosted on GitHub to the Hugging Face Datasets Hub.
144
+ """
145
+
146
+ with gr.Blocks(theme=gr.themes.Base()) as demo:
147
+ gr.HTML(
148
+ """<h1 style='text-align: center;'> GitHub to Hugging Face Hub Dataset Migration Tool</h1>
149
+ <center><i> &#x2728; Migrate a dataset in a few steps &#x2728;</i></center>"""
150
+ )
151
+ gr.HTML(
152
+ """<center> GitHub is a great place for sharing code but the Hugging Face Hub has many advantages for sharing datasets.
153
+ <br> This Space will guide you through the process of migrating a dataset from GitHub to the Hugging Face Hub. </center>"""
154
+ )
155
+ gr.Markdown("### Location of existing dataset")
156
  gr.Markdown("URL for the GitHub repository where the dataset is currently hosted")
157
  source_github_repository = gr.Textbox(lines=1, label="Source GitHub Repository URL")
158
+ gr.Markdown("### Select files and folder to migrate")
159
+ gr.Markdown(
160
+ "(Optional): select a specific folder and/or files to migrate from the GitHub repository."
161
+ )
162
  folder_in_github_repo = gr.Dropdown(
163
  None,
164
+ label="Folder in the GitHub Repository to migrate",
165
+ allow_custom_value=True,
166
+ visible=True,
167
+ )
168
+ files_in_github_repo = gr.Dropdown(
169
+ None,
170
+ label="Files in GitHub Repository to migrate",
171
  allow_custom_value=True,
172
  visible=True,
173
  )
174
  source_github_repository.change(
175
+ show_files_and_directories,
176
+ [source_github_repository],
177
+ [folder_in_github_repo, files_in_github_repo],
178
  )
179
+ gr.Markdown("### Destination for your migrated dataset")
180
+ gr.Markdown("Destination repository for your dataset on the Hugging Face Hub")
181
  destination_hf_hub_repository = gr.Textbox(
182
  label="Destination Hugging Face Repository",
183
+ placeholder="i.e. <hugging face username>/<repository_name>",
184
  )
185
+ gr.Markdown("## Authentication")
186
  gr.Markdown(
187
  """You need to provide a token with write access to the namespace you want to upload to.
188
+ You can generate/access your Hugging FAce token from [here](https://huggingface.co/settings/token)."""
189
  )
190
  hf_token = gr.Textbox(label="Hugging Face Token", type="password")
191
+ summit_btn = gr.Button("Migrate Dataset")
192
  result = gr.Markdown(label="Summary", visible=True)
193
  summit_btn.click(
194
  push_to_hf,