Spaces:
Sleeping
Sleeping
import os | |
import requests | |
import shutil | |
import gradio as gr | |
from concurrent.futures import ThreadPoolExecutor | |
from zipfile import ZipFile | |
def get_image_ids(batch_id: str) -> list[str]: | |
"""A list of image IDs in the given batch""" | |
response = requests.get(f"https://iiifintern.ra.se/arkis!{batch_id}/manifest") | |
response.raise_for_status() | |
response = response.json() | |
return [item["id"].split("!")[1][:14] for item in response["items"]] | |
def download_image(url: str, dest: str) -> None: | |
""" | |
Download an image | |
Arguments: | |
url: Image url | |
dest: Destination file name | |
""" | |
response = requests.get(url, stream=True) | |
with open(dest, "wb") as out_file: | |
shutil.copyfileobj(response.raw, out_file) | |
del response | |
def download_image_by_image_id(image_id: str): | |
""" | |
Download the image with the given image ID | |
Creates a directory named after the batch ID and saves the image in | |
that directory. | |
""" | |
batch_id = image_id[:8] | |
os.makedirs(batch_id, exist_ok=True) | |
url = f"https://iiifintern.ra.se/arkis!{image_id}/full/max/0/default.jpg" | |
dest = os.path.join(batch_id, image_id + ".jpg") | |
download_image(url, dest) | |
def download_batch_images(batch_id: str, workers: int = 2, progress=None): | |
image_ids = get_image_ids(batch_id) | |
total_images = len(image_ids) | |
if progress: | |
progress(0, desc=f"Starting download for {batch_id}...") | |
with ThreadPoolExecutor(max_workers=workers) as executor: | |
for image_id in image_ids: | |
executor.submit(download_image_by_image_id, image_id) | |
# Zip the folder with downloaded images | |
zip_filename = f"{batch_id}.zip" | |
with ZipFile(zip_filename, 'w') as zipf: | |
for image_id in image_ids: | |
img_path = os.path.join(batch_id, f"{image_id}.jpg") | |
zipf.write(img_path, arcname=os.path.basename(img_path)) | |
if progress: | |
progress(1, desc=f"Completed {batch_id}") | |
return zip_filename | |
def gradio_interface(batch_ids_input, progress=gr.Progress()): | |
batch_ids = [batch_id.strip() for batch_id in batch_ids_input.split("\n") if batch_id.strip()] | |
zip_files = [] | |
try: | |
for batch_id in progress.tqdm(batch_ids, desc="Processing batches"): | |
zip_file = download_batch_images(batch_id, progress=progress) | |
zip_files.append(zip_file) | |
return zip_files # Return the list of zip files for download | |
except Exception as e: | |
return str(e) | |
with gr.Blocks() as app: | |
gr.Markdown("# Batch Image Downloader") | |
with gr.Row(): | |
with gr.Column(): | |
batch_ids_input = gr.Textbox(label="Batch IDs (one per line)", placeholder="Enter batch IDs, one per line.") | |
download_button = gr.Button("Download Images") | |
with gr.Column(): | |
output_files = gr.File(label="Download Zip Files", file_count="multiple") | |
download_button.click( | |
gradio_interface, | |
inputs=[batch_ids_input], | |
outputs=[output_files] | |
) | |
app.queue() | |
app.launch() | |