davanstrien HF staff commited on
Commit
b717308
1 Parent(s): 907b541

add better description

Browse files
Files changed (1) hide show
  1. app.py +15 -14
app.py CHANGED
@@ -164,13 +164,7 @@ See an [example dataset](https://huggingface.co/datasets/davanstrien/MOH-Bethnal
164
 
165
  The resulting text chunks are stored in a dataset that can be previewed and uploaded to the Hugging Face Hub for easy sharing and access by the community.
166
  The chunking is done using `Llama-index`'s [`SentenceSplitter`](https://docs.llamaindex.ai/en/stable/module_guides/loading/node_parsers/modules/?h=sentencesplitter#sentencesplitter) classes.
167
-
168
- ### Usage:
169
- 1. Upload Files: Use the upload button to load file(s) for processing. A preview will be automatically generated using default settings.
170
- 2. Adjust Parameters (Optional): Customize the chunk size, overlap, and sentence splitting option according to your requirements.
171
- 3. Update Preview (Optional): Click the 'Update Preview' button to view the updated dataset based on your parameter changes.
172
- 4. Login: When ready to upload, log in to your Hugging Face account using the provided login button.
173
- 5. Upload to Hub: Specify the Hub ID, choose whether to make the dataset private, and click 'Upload to Hub'."""
174
 
175
  with gr.Blocks() as demo:
176
  state = gr.State({})
@@ -180,7 +174,9 @@ with gr.Blocks() as demo:
180
  <center><i> &#128193; From random files to a Hugging Face dataset in a few steps &#128193; </i></center>"""
181
  )
182
  gr.Markdown(description)
183
-
 
 
184
  with gr.Row():
185
  upload_button = gr.File(
186
  file_types=["text"],
@@ -189,7 +185,10 @@ with gr.Blocks() as demo:
189
  interactive=True,
190
  label="Upload Files",
191
  )
192
-
 
 
 
193
  with gr.Row():
194
  split_sentences = gr.Checkbox(True, label="Split sentences?")
195
  chunk_size = gr.Number(
@@ -206,17 +205,19 @@ with gr.Blocks() as demo:
206
  maximum=4096,
207
  step=1,
208
  )
209
-
 
 
210
  update_preview_button = gr.Button("Update Preview")
 
211
  corpus_preview_df = gr.DataFrame(label="Dataset Preview")
212
  preview_summary = gr.Markdown()
213
-
 
 
214
  with gr.Row():
215
  gr.LoginButton()
216
  with gr.Column():
217
- gr.Markdown(
218
- "To upload to the Hub, add an ID for where you want to push the dataset"
219
- )
220
  hub_id = gr.Textbox(value=None, label="Hub ID")
221
  private = gr.Checkbox(False, label="Upload dataset to a private repo?")
222
 
 
164
 
165
  The resulting text chunks are stored in a dataset that can be previewed and uploaded to the Hugging Face Hub for easy sharing and access by the community.
166
  The chunking is done using `Llama-index`'s [`SentenceSplitter`](https://docs.llamaindex.ai/en/stable/module_guides/loading/node_parsers/modules/?h=sentencesplitter#sentencesplitter) classes.
167
+ """
 
 
 
 
 
 
168
 
169
  with gr.Blocks() as demo:
170
  state = gr.State({})
 
174
  <center><i> &#128193; From random files to a Hugging Face dataset in a few steps &#128193; </i></center>"""
175
  )
176
  gr.Markdown(description)
177
+ gr.Markdown(
178
+ "### 1. Upload Files\nClick 'Upload Files' to select text file(s). A preview will generate automatically"
179
+ )
180
  with gr.Row():
181
  upload_button = gr.File(
182
  file_types=["text"],
 
185
  interactive=True,
186
  label="Upload Files",
187
  )
188
+ gr.Markdown("""
189
+ ### 2. Adjust Parameters for Chunking Text (Optional)
190
+ Customize the chunk size, overlap, and sentence splitting option according to your requirements.
191
+ """)
192
  with gr.Row():
193
  split_sentences = gr.Checkbox(True, label="Split sentences?")
194
  chunk_size = gr.Number(
 
205
  maximum=4096,
206
  step=1,
207
  )
208
+ gr.Markdown(
209
+ "### 3. Update Preview\nClick 'Update Preview' to see changes based on new parameters."
210
+ )
211
  update_preview_button = gr.Button("Update Preview")
212
+
213
  corpus_preview_df = gr.DataFrame(label="Dataset Preview")
214
  preview_summary = gr.Markdown()
215
+ gr.Markdown("""### 4. Upload to Hub
216
+ After adjusting parameters and previewing the dataset, you can upload it to the Hugging Face Hub. Make sure to sign in to your Hugging Face account. Specify the Hub ID and choose whether to make the dataset private. Click 'Upload to Hub' to complete the process.
217
+ """)
218
  with gr.Row():
219
  gr.LoginButton()
220
  with gr.Column():
 
 
 
221
  hub_id = gr.Textbox(value=None, label="Hub ID")
222
  private = gr.Checkbox(False, label="Upload dataset to a private repo?")
223