Spaces:

Wauplin
/

space_to_dataset_saver

Running

Update app.py

ca00bce 12 months ago

3.6 kB

	# Start by setting token and debug mode before starting schedulers
	import os
	from huggingface_hub import logging, login
	login(token=os.environ.get("HF_TOKEN"), write_permission=True)
	logging.set_verbosity_debug()

	# Start apps
	from pathlib import Path

	import gradio as gr

	from app_1M_image import get_demo as get_demo_1M_image
	from app_image import get_demo as get_demo_image
	from app_json import get_demo as get_demo_json

	def _get_demo_code(path: str) -> str:
	code = Path(path).read_text()
	code = code.replace("def get_demo():", "with gr.Blocks() as demo:")
	code += "\n\ndemo.launch()"
	return code


	DEMO_EXPLANATION = """
	<h1 style='text-align: center; margin-bottom: 1rem'> How to persist data from a Space to a Dataset? </h1>

	This demo shows how to leverage `gradio` and `huggingface_hub` to save data from a Space to a Dataset on the Hub.
	When doing so, a few things must be taken care of: file formats, concurrent writes, name collision, number of commits,
	number of files, and more. The tabs below show different ways of implementing a "save to dataset" feature. Depending on the
	complexity and usage of your app, you might want to use one or the other.

	This Space comes as a demo for this `huggingface_hub` [guide](https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#scheduled-uploads). Please check it out if you need more technical details.
	"""

	JSON_DEMO_EXPLANATION = """
	## Use case

	- Save inputs and outputs
	- Build an annotation platform

	## Data

	Json-able only: text and numeric but no binaries.

	## Robustness

	Works with concurrent users and replicas.

	## Limitations

	If you expect millions of lines, you must split the local JSON file into multiple files to avoid getting your file tracked as LFS (5MB) on the Hub.

	## Demo
	"""

	IMAGE_DEMO_EXPLANATION = """
	## Use case

	Save images with metadata (caption, parameters, datetime, etc.).

	## Robustness

	Works with concurrent users and replicas.

	## Limitations

	- only 10k images/folder are supported on the Hub. If you expect more usage, you must save data in subfolders.
	- only 1M images/repo supported on the Hub. If you expect more usage, you can zip your data before uploading. See the _1M images Dataset_ demo.

	## Demo
	"""

	IMAGE_1M_DEMO_EXPLANATION = """
	## Use case:

	Save 1M images with metadata (caption, parameters, datetime, etc.).

	## Robustness

	Works with concurrent users and replicas.

	## Limitations

	None.

	## Demo
	"""

	with gr.Blocks() as demo:
	gr.Markdown(DEMO_EXPLANATION)

	with gr.Tab("JSON Dataset"):
	gr.Markdown(JSON_DEMO_EXPLANATION)
	get_demo_json()
	gr.Markdown("## Result\n\nhttps://huggingface.co/datasets/Wauplin/example-space-to-dataset-json\n\n## Code")
	with gr.Accordion("Source code", open=True):
	gr.Code(_get_demo_code("app_json.py"), language="python")

	with gr.Tab("Image Dataset"):
	gr.Markdown(IMAGE_DEMO_EXPLANATION)
	get_demo_image()
	gr.Markdown("## Result\n\nhttps://huggingface.co/datasets/Wauplin/example-space-to-dataset-image\n\n## Code")
	with gr.Accordion("Source code", open=True):
	gr.Code(_get_demo_code("app_image.py"), language="python")

	with gr.Tab("1M images Dataset"):
	gr.Markdown(IMAGE_1M_DEMO_EXPLANATION)
	get_demo_1M_image()
	gr.Markdown(
	"## Result\n\nhttps://huggingface.co/datasets/Wauplin/example-space-to-dataset-image-zip\n\n## Code"
	)
	with gr.Accordion("Source code", open=True):
	gr.Code(_get_demo_code("app_1M_image.py"), language="python")
	demo.launch()